From 0f65a4027a1abfbaf7d8eb3f47efc87d88bd6531 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sat, 21 Oct 2023 20:46:59 +0100
Subject: [PATCH] Add support for regex filters on video names

Update views.py
Update tests.py
Update source.html
Update tasks.py
Update signals.py
Update 0001_initial.py
Update models.py
Update models.py
Update tests.py
---
 tubesync/sync/migrations/0001_initial.py |  1 +
 tubesync/sync/models.py                  |  7 +++
 tubesync/sync/signals.py                 | 76 +++++++++++++++---------
 tubesync/sync/tasks.py                   |  6 ++
 tubesync/sync/templates/sync/source.html |  4 ++
 tubesync/sync/tests.py                   |  3 +
 tubesync/sync/views.py                   |  2 +-
 7 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py
index aa267a9..cabd403 100644
--- a/tubesync/sync/migrations/0001_initial.py
+++ b/tubesync/sync/migrations/0001_initial.py
@@ -24,6 +24,7 @@ class Migration(migrations.Migration):
                 ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
                 ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
                 ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
+                ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
                 ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
                 ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
                 ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),
diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index bb8c723..c4f1f78 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -287,6 +287,13 @@ class Source(models.Model):
         help_text=_('If "delete old media" is ticked, the number of days after which '
                     'to automatically delete media')
     )
+    filter_text = models.CharField(
+        _('filter string'),
+        max_length=100,
+        default='.*',
+        blank=True
+        help_text=_('Regex compatible filter string for video titles')
+    )
     delete_removed_media = models.BooleanField(
         _('delete removed media'),
         default=False,
diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index f27b452..e9fdc40 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -1,4 +1,5 @@
 import os
+import re
 from django.conf import settings
 from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
 from django.dispatch import receiver
@@ -104,36 +105,57 @@ def media_post_save(sender, instance, created, **kwargs):
     # already been downloaded
     if not instance.downloaded:
         max_cap_age = instance.source.download_cap_date
-        published = instance.published
-        if not published:
-            if not instance.skip:
-                log.warn(f'Media: {instance.source} / {instance} has no published date '
-                         f'set, marking to be skipped')
+        filter_text = instance.source.filter_text
+        published = instance.published 
+
+        if instance.skip:
+                #currently marked to be skipped, check if skip conditions still apply
+                if not published:
+                    log.debug(f'Media: {instance.source} / {instance} has no published date '
+                                  f'set but is already marked to be skipped')
+                else:            
+                    if max_cap_age and filter_text:
+                        if (published > max_cap_age) and (re.search(filter_text,instance.title)):
+                            # Media was published after the cap date but is set to be skipped
+                            print('Has a valid publishing date and matches filter, marking unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                        else:
+                            print('does not have a valid publishing date or filter string, already marked skipped')
+                            log.info(f'Media: {instance.source} / {instance} has no published date '
+                                  f'set but is already marked to be skipped')
+                    elif max_cap_age:
+                        if published > max_cap_age:
+                            # Media was published after the cap date but is set to be skipped
+                            log.info(f'Media: {instance.source} / {instance} has a valid '
+                                    f'publishing date, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                    elif filter_text:
+                        if re.search(filter_text,instance.title):
+                            # Media was published after the cap date but is set to be skipped
+                            log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+        else:
+            if not published:
+                log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
                 instance.skip = True
                 cap_changed = True
             else:
-                log.debug(f'Media: {instance.source} / {instance} has no published date '
-                          f'set but is already marked to be skipped')
-        else:
-            if max_cap_age:
-                if published > max_cap_age and instance.skip:
-                    # Media was published after the cap date but is set to be skipped
-                    log.info(f'Media: {instance.source} / {instance} has a valid '
-                            f'publishing date, marking to be unskipped')
-                    instance.skip = False
-                    cap_changed = True
-                elif published <= max_cap_age and not instance.skip:
-                    log.info(f'Media: {instance.source} / {instance} is too old for '
-                            f'the download cap date, marking to be skipped')
-                    instance.skip = True
-                    cap_changed = True
-            else:
-                if instance.skip:
-                    # Media marked to be skipped but source download cap removed
-                    log.info(f'Media: {instance.source} / {instance} has a valid '
-                            f'publishing date, marking to be unskipped')
-                    instance.skip = False
-                    cap_changed = True
+                if max_cap_age:
+                    if published <= max_cap_age:            
+                        log.info(f'Media: {instance.source} / {instance} is too old for '
+                                f'the download cap date, marking to be skipped')
+                        instance.skip = True
+                        cap_changed = True
+                if filter_text:
+                    if not re.search(filter_text,instance.title):
+                        #media doesn't match the filter text but is not marked to be skipped
+                        log.info(f'Media: {instance.source} / {instance} does not match the filter text')
+                        instance.skip = True
+                        cap_changed = True
+      
     # Recalculate the "can_download" flag, this may
     # need to change if the source specifications have been changed
     if instance.metadata:
diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 2f94621..cbb54cc 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -8,6 +8,7 @@ import os
 import json
 import math
 import uuid
+import re
 from io import BytesIO
 from hashlib import sha1
 from datetime import timedelta, datetime
@@ -254,6 +255,11 @@ def download_media_metadata(media_id):
             log.warn(f'Media: {source} / {media} is older than cap age '
                      f'{max_cap_age}, skipping')
             media.skip = True
+    # If the source has a search filter, check the video title matches the filter
+    if not re.search(source.filter_text,media.title):
+        # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
+        log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping')
+        media.skip = True
     # If the source has a cut-off check the upload date is within the allowed delta
     if source.delete_old_media and source.days_to_keep > 0:
         if not isinstance(media.published, datetime):
diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html
index 22122e2..c5812b2 100644
--- a/tubesync/sync/templates/sync/source.html
+++ b/tubesync/sync/templates/sync/source.html
@@ -43,6 +43,10 @@
         <td class="hide-on-small-only">Directory</td>
         <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td>
       </tr>
+      <tr title="Filter text">
+        <td class="hide-on-small-only">Filter text</td>
+        <td><span class="hide-on-med-and-up">Filter text<br></span><strong>{{ source.filter_text }}</strong></td>
+      </tr>
       <tr title="Media file name format to use for saving files">
         <td class="hide-on-small-only">Media format</td>
         <td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td>
diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py
index f303c88..a4963db 100644
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@@ -175,6 +175,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': 3600,
             'delete_old_media': False,
             'days_to_keep': 14,
@@ -217,6 +218,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': Source.IndexSchedule.EVERY_HOUR,
             'delete_old_media': False,
             'days_to_keep': 14,
@@ -247,6 +249,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': Source.IndexSchedule.EVERY_2_HOURS,  # changed
             'delete_old_media': False,
             'days_to_keep': 14,
diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py
index e187cd8..0b808eb 100644
--- a/tubesync/sync/views.py
+++ b/tubesync/sync/views.py
@@ -294,7 +294,7 @@ class ValidateSourceView(FormView):
 
 class EditSourceMixin:
     model = Source
-    fields = ('source_type', 'key', 'name', 'directory', 'media_format',
+    fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format',
               'index_schedule', 'download_media', 'download_cap', 'delete_old_media',
               'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec',
               'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',