diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py index cabd403..aa267a9 100644 --- a/tubesync/sync/migrations/0001_initial.py +++ b/tubesync/sync/migrations/0001_initial.py @@ -24,7 +24,6 @@ class Migration(migrations.Migration): ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')), ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')), ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')), - ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')), ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')), ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')), ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')), diff --git a/tubesync/sync/migrations/0020_auto_20231024_1825.py b/tubesync/sync/migrations/0020_auto_20231024_1825.py new file mode 100644 index 0000000..295339a --- /dev/null +++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.22 on 2023-10-24 17:25 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0019_add_delete_removed_media'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='filter_text', + field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'), + ), + migrations.AlterField( + model_name='source', + name='auto_subtitles', + field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'), + ), + migrations.AlterField( + model_name='source', + name='sub_langs', + field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index af281c8..729e21a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1,6 +1,7 @@ import os import uuid import json +import re from xml.etree import ElementTree from collections import OrderedDict from datetime import datetime, timedelta @@ -290,7 +291,7 @@ class Source(models.Model): filter_text = models.CharField( _('filter string'), max_length=100, - default='.*', + default='', blank=True, help_text=_('Regex compatible filter string for video titles') ) @@ -545,6 +546,9 @@ class Source(models.Model): except Exception as e: return '' + def is_regex_match(self, media_item_title): + return bool(re.search(self.filter_text,media_item_title)) + def index_media(self): ''' Index the media source returning a list of media metadata as dicts. diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index e9fdc40..d1f3d03 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,5 +1,4 @@ import os -import re from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.dispatch import receiver @@ -109,34 +108,34 @@ def media_post_save(sender, instance, created, **kwargs): published = instance.published if instance.skip: - #currently marked to be skipped, check if skip conditions still apply - if not published: - log.debug(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - else: - if max_cap_age and filter_text: - if (published > max_cap_age) and (re.search(filter_text,instance.title)): - # Media was published after the cap date but is set to be skipped - print('Has a valid publishing date and matches filter, marking unskipped') - instance.skip = False - cap_changed = True - else: - print('does not have a valid publishing date or filter string, already marked skipped') - log.info(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - elif max_cap_age: - if published > max_cap_age: - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True - elif filter_text: - if re.search(filter_text,instance.title): - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') - instance.skip = False - cap_changed = True + #currently marked to be skipped, check if skip conditions still apply + if not published: + log.debug(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + else: + if max_cap_age and filter_text: + if (published > max_cap_age) and (source.is_regex_match(instance.title)): + # Media was published after the cap date and matches the filter text, but is set to be skipped + print('Has a valid publishing date and matches filter, marking unskipped') + instance.skip = False + cap_changed = True + else: + print('does not have a valid publishing date or filter string, already marked skipped') + log.info(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + elif max_cap_age: + if published > max_cap_age: + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'publishing date, marking to be unskipped') + instance.skip = False + cap_changed = True + elif filter_text: + if source.is_regex_match(instance.title): + # Media matches the filter text but is set to be skipped + log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') + instance.skip = False + cap_changed = True else: if not published: log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index cbb54cc..7e79530 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,7 +8,6 @@ import os import json import math import uuid -import re from io import BytesIO from hashlib import sha1 from datetime import timedelta, datetime @@ -256,9 +255,9 @@ def download_media_metadata(media_id): f'{max_cap_age}, skipping') media.skip = True # If the source has a search filter, check the video title matches the filter - if not re.search(source.filter_text,media.title): + if source.filter_text and not source.is_regex_match(media.title): # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false - log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping') + log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping') media.skip = True # If the source has a cut-off check the upload date is within the allowed delta if source.delete_old_media and source.days_to_keep > 0: diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index a4963db..1ca2643 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1471,6 +1471,29 @@ class FormatMatchingTestCase(TestCase): self.media.get_best_video_format() self.media.get_best_audio_format() + def test_is_regex_match(self): + + self.media.metadata = all_test_metadata['boring'] + expected_matches = { + ('.*'): (True), + ('no fancy stuff'): (True), + ('No fancy stuff'): (False), + ('(?i)No fancy stuff'): (True), #set case insensitive flag + ('no'): (True), + ('Foo'): (False), + ('^(?!.*fancy).*$'): (False), + ('^(?!.*funny).*$'): (True), + ('(?=.*f.*)(?=.{0,2}|.{4,})'): (True), + ('f{4,}'): (False), + ('^[^A-Z]*$'): (True), + ('^[^a-z]*$'): (False), + ('^[^\\s]*$'): (False) + } + + for params, expected in expected_matches.items(): + self.source.filter_text = params + expected_match_result = expected + self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result) class TasksTestCase(TestCase): def setUp(self):