From 0f65a4027a1abfbaf7d8eb3f47efc87d88bd6531 Mon Sep 17 00:00:00 2001 From: locke4 <65832338+locke4@users.noreply.github.com> Date: Sat, 21 Oct 2023 20:46:59 +0100 Subject: [PATCH] Add support for regex filters on video names Update views.py Update tests.py Update source.html Update tasks.py Update signals.py Update 0001_initial.py Update models.py Update models.py Update tests.py --- tubesync/sync/migrations/0001_initial.py | 1 + tubesync/sync/models.py | 7 +++ tubesync/sync/signals.py | 76 +++++++++++++++--------- tubesync/sync/tasks.py | 6 ++ tubesync/sync/templates/sync/source.html | 4 ++ tubesync/sync/tests.py | 3 + tubesync/sync/views.py | 2 +- 7 files changed, 71 insertions(+), 28 deletions(-) diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py index aa267a9..cabd403 100644 --- a/tubesync/sync/migrations/0001_initial.py +++ b/tubesync/sync/migrations/0001_initial.py @@ -24,6 +24,7 @@ class Migration(migrations.Migration): ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')), ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')), ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')), + ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')), ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')), ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')), ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')), diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index bb8c723..c4f1f78 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -287,6 +287,13 @@ class Source(models.Model): help_text=_('If "delete old media" is ticked, the number of days after which ' 'to automatically delete media') ) + filter_text = models.CharField( + _('filter string'), + max_length=100, + default='.*', + blank=True + help_text=_('Regex compatible filter string for video titles') + ) delete_removed_media = models.BooleanField( _('delete removed media'), default=False, diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f27b452..e9fdc40 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,4 +1,5 @@ import os +import re from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.dispatch import receiver @@ -104,36 +105,57 @@ def media_post_save(sender, instance, created, **kwargs): # already been downloaded if not instance.downloaded: max_cap_age = instance.source.download_cap_date - published = instance.published - if not published: - if not instance.skip: - log.warn(f'Media: {instance.source} / {instance} has no published date ' - f'set, marking to be skipped') + filter_text = instance.source.filter_text + published = instance.published + + if instance.skip: + #currently marked to be skipped, check if skip conditions still apply + if not published: + log.debug(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + else: + if max_cap_age and filter_text: + if (published > max_cap_age) and (re.search(filter_text,instance.title)): + # Media was published after the cap date but is set to be skipped + print('Has a valid publishing date and matches filter, marking unskipped') + instance.skip = False + cap_changed = True + else: + print('does not have a valid publishing date or filter string, already marked skipped') + log.info(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + elif max_cap_age: + if published > max_cap_age: + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'publishing date, marking to be unskipped') + instance.skip = False + cap_changed = True + elif filter_text: + if re.search(filter_text,instance.title): + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') + instance.skip = False + cap_changed = True + else: + if not published: + log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') instance.skip = True cap_changed = True else: - log.debug(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - else: - if max_cap_age: - if published > max_cap_age and instance.skip: - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True - elif published <= max_cap_age and not instance.skip: - log.info(f'Media: {instance.source} / {instance} is too old for ' - f'the download cap date, marking to be skipped') - instance.skip = True - cap_changed = True - else: - if instance.skip: - # Media marked to be skipped but source download cap removed - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True + if max_cap_age: + if published <= max_cap_age: + log.info(f'Media: {instance.source} / {instance} is too old for ' + f'the download cap date, marking to be skipped') + instance.skip = True + cap_changed = True + if filter_text: + if not re.search(filter_text,instance.title): + #media doesn't match the filter text but is not marked to be skipped + log.info(f'Media: {instance.source} / {instance} does not match the filter text') + instance.skip = True + cap_changed = True + # Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed if instance.metadata: diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2f94621..cbb54cc 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,6 +8,7 @@ import os import json import math import uuid +import re from io import BytesIO from hashlib import sha1 from datetime import timedelta, datetime @@ -254,6 +255,11 @@ def download_media_metadata(media_id): log.warn(f'Media: {source} / {media} is older than cap age ' f'{max_cap_age}, skipping') media.skip = True + # If the source has a search filter, check the video title matches the filter + if not re.search(source.filter_text,media.title): + # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false + log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping') + media.skip = True # If the source has a cut-off check the upload date is within the allowed delta if source.delete_old_media and source.days_to_keep > 0: if not isinstance(media.published, datetime): diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html index 22122e2..c5812b2 100644 --- a/tubesync/sync/templates/sync/source.html +++ b/tubesync/sync/templates/sync/source.html @@ -43,6 +43,10 @@