Merge pull request #7 from locke4/locke4-patch-2

Updated according to comments on PR
2023-10-24 18:38:17 +01:00
parent 96d9ee93ef 0523f481d2
commit cf06f4cbc2
6 changed files with 87 additions and 34 deletions
--- a/tubesync/sync/migrations/0001_initial.py
+++ b/tubesync/sync/migrations/0001_initial.py
@@ -24,7 +24,6 @@ class Migration(migrations.Migration):
                ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
                ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
                ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
-                ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
                ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
                ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
                ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),
--- a/tubesync/sync/migrations/0020_auto_20231024_1825.py
+++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.22 on 2023-10-24 17:25
+
+import django.core.validators
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('sync', '0019_add_delete_removed_media'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='source',
+            name='filter_text',
+            field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='auto_subtitles',
+            field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='sub_langs',
+            field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
+        ),
+    ]
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1,6 +1,7 @@
 import os
 import uuid
 import json
+import re
 from xml.etree import ElementTree
 from collections import OrderedDict
 from datetime import datetime, timedelta
@@ -290,7 +291,7 @@ class Source(models.Model):
    filter_text = models.CharField(
        _('filter string'),
        max_length=100,
-        default='.*',
+        default='',
        blank=True,
        help_text=_('Regex compatible filter string for video titles')
    )
@@ -545,6 +546,9 @@ class Source(models.Model):
        except Exception as e:
            return ''

+    def is_regex_match(self, media_item_title):
+        return bool(re.search(self.filter_text,media_item_title))       
+    
    def index_media(self):
        '''
            Index the media source returning a list of media metadata as dicts.
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -1,5 +1,4 @@
 import os
-import re
 from django.conf import settings
 from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
 from django.dispatch import receiver
@@ -109,34 +108,34 @@ def media_post_save(sender, instance, created, **kwargs):
        published = instance.published 

        if instance.skip:
-                #currently marked to be skipped, check if skip conditions still apply
-                if not published:
-                    log.debug(f'Media: {instance.source} / {instance} has no published date '
-                                  f'set but is already marked to be skipped')
-                else:            
-                    if max_cap_age and filter_text:
-                        if (published > max_cap_age) and (re.search(filter_text,instance.title)):
-                            # Media was published after the cap date but is set to be skipped
-                            print('Has a valid publishing date and matches filter, marking unskipped')
-                            instance.skip = False
-                            cap_changed = True
-                        else:
-                            print('does not have a valid publishing date or filter string, already marked skipped')
-                            log.info(f'Media: {instance.source} / {instance} has no published date '
-                                  f'set but is already marked to be skipped')
-                    elif max_cap_age:
-                        if published > max_cap_age:
-                            # Media was published after the cap date but is set to be skipped
-                            log.info(f'Media: {instance.source} / {instance} has a valid '
-                                    f'publishing date, marking to be unskipped')
-                            instance.skip = False
-                            cap_changed = True
-                    elif filter_text:
-                        if re.search(filter_text,instance.title):
-                            # Media was published after the cap date but is set to be skipped
-                            log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
-                            instance.skip = False
-                            cap_changed = True
+            #currently marked to be skipped, check if skip conditions still apply
+            if not published:
+                log.debug(f'Media: {instance.source} / {instance} has no published date '
+                        f'set but is already marked to be skipped')
+            else:            
+                if max_cap_age and filter_text:
+                    if (published > max_cap_age) and (source.is_regex_match(instance.title)):
+                        # Media was published after the cap date and matches the filter text, but is set to be skipped
+                        print('Has a valid publishing date and matches filter, marking unskipped')
+                        instance.skip = False
+                        cap_changed = True
+                    else:
+                        print('does not have a valid publishing date or filter string, already marked skipped')
+                        log.info(f'Media: {instance.source} / {instance} has no published date '
+                                f'set but is already marked to be skipped')
+                elif max_cap_age:
+                    if published > max_cap_age:
+                        # Media was published after the cap date but is set to be skipped
+                        log.info(f'Media: {instance.source} / {instance} has a valid '
+                                f'publishing date, marking to be unskipped')
+                        instance.skip = False
+                        cap_changed = True
+                elif filter_text:
+                    if source.is_regex_match(instance.title):
+                        # Media matches the filter text but is set to be skipped
+                        log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
+                        instance.skip = False
+                        cap_changed = True
        else:
            if not published:
                log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -8,7 +8,6 @@ import os
 import json
 import math
 import uuid
-import re
 from io import BytesIO
 from hashlib import sha1
 from datetime import timedelta, datetime
@@ -256,9 +255,9 @@ def download_media_metadata(media_id):
                     f'{max_cap_age}, skipping')
            media.skip = True
    # If the source has a search filter, check the video title matches the filter
-    if not re.search(source.filter_text,media.title):
+    if source.filter_text and not source.is_regex_match(media.title):
        # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
-        log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping')
+        log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping')
        media.skip = True
    # If the source has a cut-off check the upload date is within the allowed delta
    if source.delete_old_media and source.days_to_keep > 0:
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@@ -1471,6 +1471,29 @@ class FormatMatchingTestCase(TestCase):
            self.media.get_best_video_format()
            self.media.get_best_audio_format()

+    def test_is_regex_match(self):
+        
+        self.media.metadata = all_test_metadata['boring']
+        expected_matches = {
+            ('.*'): (True),
+            ('no fancy stuff'): (True),
+            ('No fancy stuff'): (False),
+            ('(?i)No fancy stuff'): (True), #set case insensitive flag
+            ('no'): (True),
+            ('Foo'): (False),
+            ('^(?!.*fancy).*$'): (False),
+            ('^(?!.*funny).*$'): (True),
+            ('(?=.*f.*)(?=.{0,2}|.{4,})'): (True),
+            ('f{4,}'): (False),
+            ('^[^A-Z]*$'): (True),
+            ('^[^a-z]*$'): (False),
+            ('^[^\\s]*$'): (False)
+        }
+
+        for params, expected in expected_matches.items():
+            self.source.filter_text = params
+            expected_match_result = expected
+            self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result)

 class TasksTestCase(TestCase):
    def setUp(self):