From 0523f481d2f0ea3b04f6213128e47776e28932d9 Mon Sep 17 00:00:00 2001 From: locke4 <65832338+locke4@users.noreply.github.com> Date: Mon, 23 Oct 2023 19:38:28 +0100 Subject: [PATCH] Updated according to comments on PR Fixed whitespace Update tests.py Ran makemigrations Update models.py Update tests.py Update models.py Update tests.py Update models.py Update models.py Update tests.py Update models.py Update tests.py Update tests.py Update tests.py Update models.py Update models.py Update tests.py Update models.py Update models.py Update tests.py Update tests.py Update signals.py Update tasks.py Update signals.py Update models.py Update tasks.py Update signals.py Update tasks.py Update models.py --- tubesync/sync/migrations/0001_initial.py | 1 - .../migrations/0020_auto_20231024_1825.py | 29 ++++++++++ tubesync/sync/models.py | 6 +- tubesync/sync/signals.py | 57 +++++++++---------- tubesync/sync/tasks.py | 5 +- tubesync/sync/tests.py | 23 ++++++++ 6 files changed, 87 insertions(+), 34 deletions(-) create mode 100644 tubesync/sync/migrations/0020_auto_20231024_1825.py diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py index cabd403..aa267a9 100644 --- a/tubesync/sync/migrations/0001_initial.py +++ b/tubesync/sync/migrations/0001_initial.py @@ -24,7 +24,6 @@ class Migration(migrations.Migration): ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')), ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')), ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')), - ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')), ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')), ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')), ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')), diff --git a/tubesync/sync/migrations/0020_auto_20231024_1825.py b/tubesync/sync/migrations/0020_auto_20231024_1825.py new file mode 100644 index 0000000..295339a --- /dev/null +++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.22 on 2023-10-24 17:25 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0019_add_delete_removed_media'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='filter_text', + field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'), + ), + migrations.AlterField( + model_name='source', + name='auto_subtitles', + field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'), + ), + migrations.AlterField( + model_name='source', + name='sub_langs', + field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index af281c8..729e21a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1,6 +1,7 @@ import os import uuid import json +import re from xml.etree import ElementTree from collections import OrderedDict from datetime import datetime, timedelta @@ -290,7 +291,7 @@ class Source(models.Model): filter_text = models.CharField( _('filter string'), max_length=100, - default='.*', + default='', blank=True, help_text=_('Regex compatible filter string for video titles') ) @@ -545,6 +546,9 @@ class Source(models.Model): except Exception as e: return '' + def is_regex_match(self, media_item_title): + return bool(re.search(self.filter_text,media_item_title)) + def index_media(self): ''' Index the media source returning a list of media metadata as dicts. diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index e9fdc40..d1f3d03 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,5 +1,4 @@ import os -import re from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.dispatch import receiver @@ -109,34 +108,34 @@ def media_post_save(sender, instance, created, **kwargs): published = instance.published if instance.skip: - #currently marked to be skipped, check if skip conditions still apply - if not published: - log.debug(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - else: - if max_cap_age and filter_text: - if (published > max_cap_age) and (re.search(filter_text,instance.title)): - # Media was published after the cap date but is set to be skipped - print('Has a valid publishing date and matches filter, marking unskipped') - instance.skip = False - cap_changed = True - else: - print('does not have a valid publishing date or filter string, already marked skipped') - log.info(f'Media: {instance.source} / {instance} has no published date ' - f'set but is already marked to be skipped') - elif max_cap_age: - if published > max_cap_age: - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'publishing date, marking to be unskipped') - instance.skip = False - cap_changed = True - elif filter_text: - if re.search(filter_text,instance.title): - # Media was published after the cap date but is set to be skipped - log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') - instance.skip = False - cap_changed = True + #currently marked to be skipped, check if skip conditions still apply + if not published: + log.debug(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + else: + if max_cap_age and filter_text: + if (published > max_cap_age) and (source.is_regex_match(instance.title)): + # Media was published after the cap date and matches the filter text, but is set to be skipped + print('Has a valid publishing date and matches filter, marking unskipped') + instance.skip = False + cap_changed = True + else: + print('does not have a valid publishing date or filter string, already marked skipped') + log.info(f'Media: {instance.source} / {instance} has no published date ' + f'set but is already marked to be skipped') + elif max_cap_age: + if published > max_cap_age: + # Media was published after the cap date but is set to be skipped + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'publishing date, marking to be unskipped') + instance.skip = False + cap_changed = True + elif filter_text: + if source.is_regex_match(instance.title): + # Media matches the filter text but is set to be skipped + log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') + instance.skip = False + cap_changed = True else: if not published: log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index cbb54cc..7e79530 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,7 +8,6 @@ import os import json import math import uuid -import re from io import BytesIO from hashlib import sha1 from datetime import timedelta, datetime @@ -256,9 +255,9 @@ def download_media_metadata(media_id): f'{max_cap_age}, skipping') media.skip = True # If the source has a search filter, check the video title matches the filter - if not re.search(source.filter_text,media.title): + if source.filter_text and not source.is_regex_match(media.title): # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false - log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping') + log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping') media.skip = True # If the source has a cut-off check the upload date is within the allowed delta if source.delete_old_media and source.days_to_keep > 0: diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index a4963db..1ca2643 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1471,6 +1471,29 @@ class FormatMatchingTestCase(TestCase): self.media.get_best_video_format() self.media.get_best_audio_format() + def test_is_regex_match(self): + + self.media.metadata = all_test_metadata['boring'] + expected_matches = { + ('.*'): (True), + ('no fancy stuff'): (True), + ('No fancy stuff'): (False), + ('(?i)No fancy stuff'): (True), #set case insensitive flag + ('no'): (True), + ('Foo'): (False), + ('^(?!.*fancy).*$'): (False), + ('^(?!.*funny).*$'): (True), + ('(?=.*f.*)(?=.{0,2}|.{4,})'): (True), + ('f{4,}'): (False), + ('^[^A-Z]*$'): (True), + ('^[^a-z]*$'): (False), + ('^[^\\s]*$'): (False) + } + + for params, expected in expected_matches.items(): + self.source.filter_text = params + expected_match_result = expected + self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result) class TasksTestCase(TestCase): def setUp(self):