diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index af629ae..a2b1225 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,6 +4,7 @@ env: IMAGE_NAME: tubesync on: + workflow_dispatch: push: branches: - main diff --git a/tubesync/common/templates/pagination.html b/tubesync/common/templates/pagination.html index 0e378a8..e48b24d 100644 --- a/tubesync/common/templates/pagination.html +++ b/tubesync/common/templates/pagination.html @@ -3,7 +3,7 @@
diff --git a/tubesync/sync/migrations/0020_auto_20231024_1825.py b/tubesync/sync/migrations/0020_auto_20231024_1825.py new file mode 100644 index 0000000..295339a --- /dev/null +++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.22 on 2023-10-24 17:25 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0019_add_delete_removed_media'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='filter_text', + field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'), + ), + migrations.AlterField( + model_name='source', + name='auto_subtitles', + field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'), + ), + migrations.AlterField( + model_name='source', + name='sub_langs', + field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index bb8c723..729e21a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1,6 +1,7 @@ import os import uuid import json +import re from xml.etree import ElementTree from collections import OrderedDict from datetime import datetime, timedelta @@ -287,6 +288,13 @@ class Source(models.Model): help_text=_('If "delete old media" is ticked, the number of days after which ' 'to automatically delete media') ) + filter_text = models.CharField( + _('filter string'), + max_length=100, + default='', + blank=True, + help_text=_('Regex compatible filter string for video titles') + ) delete_removed_media = models.BooleanField( _('delete removed media'), default=False, @@ -538,6 +546,9 @@ class Source(models.Model): except Exception as e: return '' + def is_regex_match(self, media_item_title): + return bool(re.search(self.filter_text,media_item_title)) + def index_media(self): ''' Index the media source returning a list of media metadata as dicts. diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f27b452..d1f3d03 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -104,36 +104,57 @@ def media_post_save(sender, instance, created, **kwargs): # already been downloaded if not instance.downloaded: max_cap_age = instance.source.download_cap_date - published = instance.published - if not published: - if not instance.skip: - log.warn(f'Media: {instance.source} / {instance} has no published date ' - f'set, marking to be skipped') + filter_text = instance.source.filter_text + published = instance.published + + if instance.skip: + #currently marked to be skipped, check if skip conditions still apply + if not published: + log.debug(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + else: + if max_cap_age and filter_text: + if (published > max_cap_age) and (source.is_regex_match(instance.title)): + # Media was published after the cap date and matches the filter text, but is set to be skipped + print('Has a valid publishing date and matches filter, marking unskipped') + instance.skip = False + cap_changed = True + else: + print('does not have a valid publishing date or filter string, already marked skipped') + log.info(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + elif max_cap_age: + if published > max_cap_age: + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'publishing date, marking to be unskipped') + instance.skip = False + cap_changed = True + elif filter_text: + if source.is_regex_match(instance.title): + # Media matches the filter text but is set to be skipped + log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') + instance.skip = False + cap_changed = True + else: + if not published: + log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') instance.skip = True cap_changed = True else: - log.debug(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - else: - if max_cap_age: - if published > max_cap_age and instance.skip: - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True - elif published <= max_cap_age and not instance.skip: - log.info(f'Media: {instance.source} / {instance} is too old for ' - f'the download cap date, marking to be skipped') - instance.skip = True - cap_changed = True - else: - if instance.skip: - # Media marked to be skipped but source download cap removed - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True + if max_cap_age: + if published <= max_cap_age: + log.info(f'Media: {instance.source} / {instance} is too old for ' + f'the download cap date, marking to be skipped') + instance.skip = True + cap_changed = True + if filter_text: + if not re.search(filter_text,instance.title): + #media doesn't match the filter text but is not marked to be skipped + log.info(f'Media: {instance.source} / {instance} does not match the filter text') + instance.skip = True + cap_changed = True + # Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed if instance.metadata: diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2f94621..7e79530 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -254,6 +254,11 @@ def download_media_metadata(media_id): log.warn(f'Media: {source} / {media} is older than cap age ' f'{max_cap_age}, skipping') media.skip = True + # If the source has a search filter, check the video title matches the filter + if source.filter_text and not source.is_regex_match(media.title): + # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false + log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping') + media.skip = True # If the source has a cut-off check the upload date is within the allowed delta if source.delete_old_media and source.days_to_keep > 0: if not isinstance(media.published, datetime): diff --git a/tubesync/sync/templates/sync/media.html b/tubesync/sync/templates/sync/media.html index 420b15b..d2d4e63 100644 --- a/tubesync/sync/templates/sync/media.html +++ b/tubesync/sync/templates/sync/media.html @@ -64,5 +64,5 @@ {% endfor %} -{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped %} +{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped only_skipped=only_skipped%} {% endblock %} diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html index 22122e2..c5812b2 100644 --- a/tubesync/sync/templates/sync/source.html +++ b/tubesync/sync/templates/sync/source.html @@ -43,6 +43,10 @@