Add support for regex filters on video names

Update views.py
Update tests.py
Update source.html
Update tasks.py
Update signals.py
Update 0001_initial.py
Update models.py
Update models.py
Update tests.py
This commit is contained in:
locke4 2023-10-21 20:46:59 +01:00
parent 5cac374486
commit 0f65a4027a
7 changed files with 71 additions and 28 deletions

View File

@ -24,6 +24,7 @@ class Migration(migrations.Migration):
('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')), ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')), ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')), ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')), ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')), ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')), ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),

View File

@ -287,6 +287,13 @@ class Source(models.Model):
help_text=_('If "delete old media" is ticked, the number of days after which ' help_text=_('If "delete old media" is ticked, the number of days after which '
'to automatically delete media') 'to automatically delete media')
) )
filter_text = models.CharField(
_('filter string'),
max_length=100,
default='.*',
blank=True
help_text=_('Regex compatible filter string for video titles')
)
delete_removed_media = models.BooleanField( delete_removed_media = models.BooleanField(
_('delete removed media'), _('delete removed media'),
default=False, default=False,

View File

@ -1,4 +1,5 @@
import os import os
import re
from django.conf import settings from django.conf import settings
from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
from django.dispatch import receiver from django.dispatch import receiver
@ -104,36 +105,57 @@ def media_post_save(sender, instance, created, **kwargs):
# already been downloaded # already been downloaded
if not instance.downloaded: if not instance.downloaded:
max_cap_age = instance.source.download_cap_date max_cap_age = instance.source.download_cap_date
filter_text = instance.source.filter_text
published = instance.published published = instance.published
if not published:
if not instance.skip: if instance.skip:
log.warn(f'Media: {instance.source} / {instance} has no published date ' #currently marked to be skipped, check if skip conditions still apply
f'set, marking to be skipped') if not published:
log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
else:
if max_cap_age and filter_text:
if (published > max_cap_age) and (re.search(filter_text,instance.title)):
# Media was published after the cap date but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False
cap_changed = True
else:
print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
elif max_cap_age:
if published > max_cap_age:
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
elif filter_text:
if re.search(filter_text,instance.title):
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
instance.skip = True instance.skip = True
cap_changed = True cap_changed = True
else: else:
log.debug(f'Media: {instance.source} / {instance} has no published date ' if max_cap_age:
f'set but is already marked to be skipped') if published <= max_cap_age:
else: log.info(f'Media: {instance.source} / {instance} is too old for '
if max_cap_age: f'the download cap date, marking to be skipped')
if published > max_cap_age and instance.skip: instance.skip = True
# Media was published after the cap date but is set to be skipped cap_changed = True
log.info(f'Media: {instance.source} / {instance} has a valid ' if filter_text:
f'publishing date, marking to be unskipped') if not re.search(filter_text,instance.title):
instance.skip = False #media doesn't match the filter text but is not marked to be skipped
cap_changed = True log.info(f'Media: {instance.source} / {instance} does not match the filter text')
elif published <= max_cap_age and not instance.skip: instance.skip = True
log.info(f'Media: {instance.source} / {instance} is too old for ' cap_changed = True
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
else:
if instance.skip:
# Media marked to be skipped but source download cap removed
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
# Recalculate the "can_download" flag, this may # Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed # need to change if the source specifications have been changed
if instance.metadata: if instance.metadata:

View File

@ -8,6 +8,7 @@ import os
import json import json
import math import math
import uuid import uuid
import re
from io import BytesIO from io import BytesIO
from hashlib import sha1 from hashlib import sha1
from datetime import timedelta, datetime from datetime import timedelta, datetime
@ -254,6 +255,11 @@ def download_media_metadata(media_id):
log.warn(f'Media: {source} / {media} is older than cap age ' log.warn(f'Media: {source} / {media} is older than cap age '
f'{max_cap_age}, skipping') f'{max_cap_age}, skipping')
media.skip = True media.skip = True
# If the source has a search filter, check the video title matches the filter
if not re.search(source.filter_text,media.title):
# Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping')
media.skip = True
# If the source has a cut-off check the upload date is within the allowed delta # If the source has a cut-off check the upload date is within the allowed delta
if source.delete_old_media and source.days_to_keep > 0: if source.delete_old_media and source.days_to_keep > 0:
if not isinstance(media.published, datetime): if not isinstance(media.published, datetime):

View File

@ -43,6 +43,10 @@
<td class="hide-on-small-only">Directory</td> <td class="hide-on-small-only">Directory</td>
<td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td> <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td>
</tr> </tr>
<tr title="Filter text">
<td class="hide-on-small-only">Filter text</td>
<td><span class="hide-on-med-and-up">Filter text<br></span><strong>{{ source.filter_text }}</strong></td>
</tr>
<tr title="Media file name format to use for saving files"> <tr title="Media file name format to use for saving files">
<td class="hide-on-small-only">Media format</td> <td class="hide-on-small-only">Media format</td>
<td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td> <td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td>

View File

@ -175,6 +175,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': 3600, 'index_schedule': 3600,
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,
@ -217,6 +218,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_HOUR, 'index_schedule': Source.IndexSchedule.EVERY_HOUR,
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,
@ -247,6 +249,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed 'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,

View File

@ -294,7 +294,7 @@ class ValidateSourceView(FormView):
class EditSourceMixin: class EditSourceMixin:
model = Source model = Source
fields = ('source_type', 'key', 'name', 'directory', 'media_format', fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format',
'index_schedule', 'download_media', 'download_cap', 'delete_old_media', 'index_schedule', 'download_media', 'download_cap', 'delete_old_media',
'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec', 'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec',
'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails', 'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',