From 0f65a4027a1abfbaf7d8eb3f47efc87d88bd6531 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sat, 21 Oct 2023 20:46:59 +0100
Subject: [PATCH 1/8] Add support for regex filters on video names

Update views.py
Update tests.py
Update source.html
Update tasks.py
Update signals.py
Update 0001_initial.py
Update models.py
Update models.py
Update tests.py
---
 tubesync/sync/migrations/0001_initial.py |  1 +
 tubesync/sync/models.py                  |  7 +++
 tubesync/sync/signals.py                 | 76 +++++++++++++++---------
 tubesync/sync/tasks.py                   |  6 ++
 tubesync/sync/templates/sync/source.html |  4 ++
 tubesync/sync/tests.py                   |  3 +
 tubesync/sync/views.py                   |  2 +-
 7 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py
index aa267a9..cabd403 100644
--- a/tubesync/sync/migrations/0001_initial.py
+++ b/tubesync/sync/migrations/0001_initial.py
@@ -24,6 +24,7 @@ class Migration(migrations.Migration):
                 ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
                 ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
                 ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
+                ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
                 ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
                 ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
                 ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),
diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index bb8c723..c4f1f78 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -287,6 +287,13 @@ class Source(models.Model):
         help_text=_('If "delete old media" is ticked, the number of days after which '
                     'to automatically delete media')
     )
+    filter_text = models.CharField(
+        _('filter string'),
+        max_length=100,
+        default='.*',
+        blank=True
+        help_text=_('Regex compatible filter string for video titles')
+    )
     delete_removed_media = models.BooleanField(
         _('delete removed media'),
         default=False,
diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index f27b452..e9fdc40 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -1,4 +1,5 @@
 import os
+import re
 from django.conf import settings
 from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
 from django.dispatch import receiver
@@ -104,36 +105,57 @@ def media_post_save(sender, instance, created, **kwargs):
     # already been downloaded
     if not instance.downloaded:
         max_cap_age = instance.source.download_cap_date
-        published = instance.published
-        if not published:
-            if not instance.skip:
-                log.warn(f'Media: {instance.source} / {instance} has no published date '
-                         f'set, marking to be skipped')
+        filter_text = instance.source.filter_text
+        published = instance.published 
+
+        if instance.skip:
+                #currently marked to be skipped, check if skip conditions still apply
+                if not published:
+                    log.debug(f'Media: {instance.source} / {instance} has no published date '
+                                  f'set but is already marked to be skipped')
+                else:            
+                    if max_cap_age and filter_text:
+                        if (published > max_cap_age) and (re.search(filter_text,instance.title)):
+                            # Media was published after the cap date but is set to be skipped
+                            print('Has a valid publishing date and matches filter, marking unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                        else:
+                            print('does not have a valid publishing date or filter string, already marked skipped')
+                            log.info(f'Media: {instance.source} / {instance} has no published date '
+                                  f'set but is already marked to be skipped')
+                    elif max_cap_age:
+                        if published > max_cap_age:
+                            # Media was published after the cap date but is set to be skipped
+                            log.info(f'Media: {instance.source} / {instance} has a valid '
+                                    f'publishing date, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                    elif filter_text:
+                        if re.search(filter_text,instance.title):
+                            # Media was published after the cap date but is set to be skipped
+                            log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+        else:
+            if not published:
+                log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
                 instance.skip = True
                 cap_changed = True
             else:
-                log.debug(f'Media: {instance.source} / {instance} has no published date '
-                          f'set but is already marked to be skipped')
-        else:
-            if max_cap_age:
-                if published > max_cap_age and instance.skip:
-                    # Media was published after the cap date but is set to be skipped
-                    log.info(f'Media: {instance.source} / {instance} has a valid '
-                            f'publishing date, marking to be unskipped')
-                    instance.skip = False
-                    cap_changed = True
-                elif published <= max_cap_age and not instance.skip:
-                    log.info(f'Media: {instance.source} / {instance} is too old for '
-                            f'the download cap date, marking to be skipped')
-                    instance.skip = True
-                    cap_changed = True
-            else:
-                if instance.skip:
-                    # Media marked to be skipped but source download cap removed
-                    log.info(f'Media: {instance.source} / {instance} has a valid '
-                            f'publishing date, marking to be unskipped')
-                    instance.skip = False
-                    cap_changed = True
+                if max_cap_age:
+                    if published <= max_cap_age:            
+                        log.info(f'Media: {instance.source} / {instance} is too old for '
+                                f'the download cap date, marking to be skipped')
+                        instance.skip = True
+                        cap_changed = True
+                if filter_text:
+                    if not re.search(filter_text,instance.title):
+                        #media doesn't match the filter text but is not marked to be skipped
+                        log.info(f'Media: {instance.source} / {instance} does not match the filter text')
+                        instance.skip = True
+                        cap_changed = True
+      
     # Recalculate the "can_download" flag, this may
     # need to change if the source specifications have been changed
     if instance.metadata:
diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 2f94621..cbb54cc 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -8,6 +8,7 @@ import os
 import json
 import math
 import uuid
+import re
 from io import BytesIO
 from hashlib import sha1
 from datetime import timedelta, datetime
@@ -254,6 +255,11 @@ def download_media_metadata(media_id):
             log.warn(f'Media: {source} / {media} is older than cap age '
                      f'{max_cap_age}, skipping')
             media.skip = True
+    # If the source has a search filter, check the video title matches the filter
+    if not re.search(source.filter_text,media.title):
+        # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
+        log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping')
+        media.skip = True
     # If the source has a cut-off check the upload date is within the allowed delta
     if source.delete_old_media and source.days_to_keep > 0:
         if not isinstance(media.published, datetime):
diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html
index 22122e2..c5812b2 100644
--- a/tubesync/sync/templates/sync/source.html
+++ b/tubesync/sync/templates/sync/source.html
@@ -43,6 +43,10 @@
         <td class="hide-on-small-only">Directory</td>
         <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td>
       </tr>
+      <tr title="Filter text">
+        <td class="hide-on-small-only">Filter text</td>
+        <td><span class="hide-on-med-and-up">Filter text<br></span><strong>{{ source.filter_text }}</strong></td>
+      </tr>
       <tr title="Media file name format to use for saving files">
         <td class="hide-on-small-only">Media format</td>
         <td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td>
diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py
index f303c88..a4963db 100644
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@@ -175,6 +175,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': 3600,
             'delete_old_media': False,
             'days_to_keep': 14,
@@ -217,6 +218,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': Source.IndexSchedule.EVERY_HOUR,
             'delete_old_media': False,
             'days_to_keep': 14,
@@ -247,6 +249,7 @@ class FrontEndTestCase(TestCase):
             'directory': 'testdirectory',
             'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
             'download_cap': 0,
+            'filter_text':'.*',
             'index_schedule': Source.IndexSchedule.EVERY_2_HOURS,  # changed
             'delete_old_media': False,
             'days_to_keep': 14,
diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py
index e187cd8..0b808eb 100644
--- a/tubesync/sync/views.py
+++ b/tubesync/sync/views.py
@@ -294,7 +294,7 @@ class ValidateSourceView(FormView):
 
 class EditSourceMixin:
     model = Source
-    fields = ('source_type', 'key', 'name', 'directory', 'media_format',
+    fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format',
               'index_schedule', 'download_media', 'download_cap', 'delete_old_media',
               'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec',
               'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',

From fea0bb191e1f441f31f99fdb52ea5a151b8c6d5f Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sat, 21 Oct 2023 21:23:57 +0100
Subject: [PATCH 2/8] Fix typo

---
 tubesync/sync/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index c4f1f78..af281c8 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -291,7 +291,7 @@ class Source(models.Model):
         _('filter string'),
         max_length=100,
         default='.*',
-        blank=True
+        blank=True,
         help_text=_('Regex compatible filter string for video titles')
     )
     delete_removed_media = models.BooleanField(

From 22edd1bbda959c60b319ed400c0e028e71e905d0 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sun, 22 Oct 2023 02:25:19 +0100
Subject: [PATCH 3/8] Update pagination.html

---
 tubesync/common/templates/pagination.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/common/templates/pagination.html b/tubesync/common/templates/pagination.html
index 0e378a8..e48b24d 100644
--- a/tubesync/common/templates/pagination.html
+++ b/tubesync/common/templates/pagination.html
@@ -3,7 +3,7 @@
   <div class="col s12">
     <div class="pagination">
       {% for i in paginator.page_range %}
-        <a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}">{{ i }}</a>
+        <a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}{% if only_skipped %}&only_skipped=yes{% endif %}">{{ i }}</a>
       {% endfor %}
     </div>
   </div>

From 0c5e3d38183fad953ef4c85ede3025de6597cfad Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sun, 22 Oct 2023 02:30:24 +0100
Subject: [PATCH 4/8] Update media.html

---
 tubesync/sync/templates/sync/media.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/templates/sync/media.html b/tubesync/sync/templates/sync/media.html
index 420b15b..d2d4e63 100644
--- a/tubesync/sync/templates/sync/media.html
+++ b/tubesync/sync/templates/sync/media.html
@@ -64,5 +64,5 @@
   </div>
   {% endfor %}
 </div>
-{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped %}
+{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped only_skipped=only_skipped%}
 {% endblock %}

From 8240c49d5cee6bae0383007e32dc850c2a46d739 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Sun, 22 Oct 2023 02:42:57 +0100
Subject: [PATCH 5/8] Update ci.yaml

---
 .github/workflows/ci.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index af629ae..a2b1225 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -4,6 +4,7 @@ env:
   IMAGE_NAME: tubesync
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - main

From aa4bd4ec26350f829e145b471105fa18a999aa07 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Tue, 24 Oct 2023 18:17:56 +0100
Subject: [PATCH 6/8] Ran makemigrations

---
 tubesync/sync/migrations/0001_initial.py      |  1 -
 .../migrations/0020_auto_20231024_1812.py     | 29 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 tubesync/sync/migrations/0020_auto_20231024_1812.py

diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py
index cabd403..aa267a9 100644
--- a/tubesync/sync/migrations/0001_initial.py
+++ b/tubesync/sync/migrations/0001_initial.py
@@ -24,7 +24,6 @@ class Migration(migrations.Migration):
                 ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
                 ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
                 ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
-                ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
                 ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
                 ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
                 ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),
diff --git a/tubesync/sync/migrations/0020_auto_20231024_1812.py b/tubesync/sync/migrations/0020_auto_20231024_1812.py
new file mode 100644
index 0000000..1fb49a9
--- /dev/null
+++ b/tubesync/sync/migrations/0020_auto_20231024_1812.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.22 on 2023-10-24 17:12
+
+import django.core.validators
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('sync', '0019_add_delete_removed_media'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='source',
+            name='filter_text',
+            field=models.CharField(blank=True, default='.*', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='auto_subtitles',
+            field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='sub_langs',
+            field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
+        ),
+    ]

From 0523f481d2f0ea3b04f6213128e47776e28932d9 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Mon, 23 Oct 2023 19:38:28 +0100
Subject: [PATCH 7/8] Updated according to comments on PR

Fixed whitespace

Update tests.py

Ran makemigrations

Update models.py

Update tests.py

Update models.py

Update tests.py

Update models.py

Update models.py

Update tests.py

Update models.py

Update tests.py

Update tests.py

Update tests.py

Update models.py

Update models.py

Update tests.py

Update models.py

Update models.py

Update tests.py

Update tests.py

Update signals.py

Update tasks.py

Update signals.py

Update models.py

Update tasks.py

Update signals.py

Update tasks.py

Update models.py
---
 tubesync/sync/migrations/0001_initial.py      |  1 -
 .../migrations/0020_auto_20231024_1825.py     | 29 ++++++++++
 tubesync/sync/models.py                       |  6 +-
 tubesync/sync/signals.py                      | 57 +++++++++----------
 tubesync/sync/tasks.py                        |  5 +-
 tubesync/sync/tests.py                        | 23 ++++++++
 6 files changed, 87 insertions(+), 34 deletions(-)
 create mode 100644 tubesync/sync/migrations/0020_auto_20231024_1825.py

diff --git a/tubesync/sync/migrations/0001_initial.py b/tubesync/sync/migrations/0001_initial.py
index cabd403..aa267a9 100644
--- a/tubesync/sync/migrations/0001_initial.py
+++ b/tubesync/sync/migrations/0001_initial.py
@@ -24,7 +24,6 @@ class Migration(migrations.Migration):
                 ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
                 ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
                 ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
-                ('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
                 ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
                 ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
                 ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),
diff --git a/tubesync/sync/migrations/0020_auto_20231024_1825.py b/tubesync/sync/migrations/0020_auto_20231024_1825.py
new file mode 100644
index 0000000..295339a
--- /dev/null
+++ b/tubesync/sync/migrations/0020_auto_20231024_1825.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.22 on 2023-10-24 17:25
+
+import django.core.validators
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('sync', '0019_add_delete_removed_media'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='source',
+            name='filter_text',
+            field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='auto_subtitles',
+            field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
+        ),
+        migrations.AlterField(
+            model_name='source',
+            name='sub_langs',
+            field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
+        ),
+    ]
diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index af281c8..729e21a 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1,6 +1,7 @@
 import os
 import uuid
 import json
+import re
 from xml.etree import ElementTree
 from collections import OrderedDict
 from datetime import datetime, timedelta
@@ -290,7 +291,7 @@ class Source(models.Model):
     filter_text = models.CharField(
         _('filter string'),
         max_length=100,
-        default='.*',
+        default='',
         blank=True,
         help_text=_('Regex compatible filter string for video titles')
     )
@@ -545,6 +546,9 @@ class Source(models.Model):
         except Exception as e:
             return ''
 
+    def is_regex_match(self, media_item_title):
+        return bool(re.search(self.filter_text,media_item_title))       
+    
     def index_media(self):
         '''
             Index the media source returning a list of media metadata as dicts.
diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index e9fdc40..d1f3d03 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -1,5 +1,4 @@
 import os
-import re
 from django.conf import settings
 from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
 from django.dispatch import receiver
@@ -109,34 +108,34 @@ def media_post_save(sender, instance, created, **kwargs):
         published = instance.published 
 
         if instance.skip:
-                #currently marked to be skipped, check if skip conditions still apply
-                if not published:
-                    log.debug(f'Media: {instance.source} / {instance} has no published date '
-                                  f'set but is already marked to be skipped')
-                else:            
-                    if max_cap_age and filter_text:
-                        if (published > max_cap_age) and (re.search(filter_text,instance.title)):
-                            # Media was published after the cap date but is set to be skipped
-                            print('Has a valid publishing date and matches filter, marking unskipped')
-                            instance.skip = False
-                            cap_changed = True
-                        else:
-                            print('does not have a valid publishing date or filter string, already marked skipped')
-                            log.info(f'Media: {instance.source} / {instance} has no published date '
-                                  f'set but is already marked to be skipped')
-                    elif max_cap_age:
-                        if published > max_cap_age:
-                            # Media was published after the cap date but is set to be skipped
-                            log.info(f'Media: {instance.source} / {instance} has a valid '
-                                    f'publishing date, marking to be unskipped')
-                            instance.skip = False
-                            cap_changed = True
-                    elif filter_text:
-                        if re.search(filter_text,instance.title):
-                            # Media was published after the cap date but is set to be skipped
-                            log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
-                            instance.skip = False
-                            cap_changed = True
+            #currently marked to be skipped, check if skip conditions still apply
+            if not published:
+                log.debug(f'Media: {instance.source} / {instance} has no published date '
+                        f'set but is already marked to be skipped')
+            else:            
+                if max_cap_age and filter_text:
+                    if (published > max_cap_age) and (source.is_regex_match(instance.title)):
+                        # Media was published after the cap date and matches the filter text, but is set to be skipped
+                        print('Has a valid publishing date and matches filter, marking unskipped')
+                        instance.skip = False
+                        cap_changed = True
+                    else:
+                        print('does not have a valid publishing date or filter string, already marked skipped')
+                        log.info(f'Media: {instance.source} / {instance} has no published date '
+                                f'set but is already marked to be skipped')
+                elif max_cap_age:
+                    if published > max_cap_age:
+                        # Media was published after the cap date but is set to be skipped
+                        log.info(f'Media: {instance.source} / {instance} has a valid '
+                                f'publishing date, marking to be unskipped')
+                        instance.skip = False
+                        cap_changed = True
+                elif filter_text:
+                    if source.is_regex_match(instance.title):
+                        # Media matches the filter text but is set to be skipped
+                        log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
+                        instance.skip = False
+                        cap_changed = True
         else:
             if not published:
                 log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index cbb54cc..7e79530 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -8,7 +8,6 @@ import os
 import json
 import math
 import uuid
-import re
 from io import BytesIO
 from hashlib import sha1
 from datetime import timedelta, datetime
@@ -256,9 +255,9 @@ def download_media_metadata(media_id):
                      f'{max_cap_age}, skipping')
             media.skip = True
     # If the source has a search filter, check the video title matches the filter
-    if not re.search(source.filter_text,media.title):
+    if source.filter_text and not source.is_regex_match(media.title):
         # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
-        log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping')
+        log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping')
         media.skip = True
     # If the source has a cut-off check the upload date is within the allowed delta
     if source.delete_old_media and source.days_to_keep > 0:
diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py
index a4963db..1ca2643 100644
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@@ -1471,6 +1471,29 @@ class FormatMatchingTestCase(TestCase):
             self.media.get_best_video_format()
             self.media.get_best_audio_format()
 
+    def test_is_regex_match(self):
+        
+        self.media.metadata = all_test_metadata['boring']
+        expected_matches = {
+            ('.*'): (True),
+            ('no fancy stuff'): (True),
+            ('No fancy stuff'): (False),
+            ('(?i)No fancy stuff'): (True), #set case insensitive flag
+            ('no'): (True),
+            ('Foo'): (False),
+            ('^(?!.*fancy).*$'): (False),
+            ('^(?!.*funny).*$'): (True),
+            ('(?=.*f.*)(?=.{0,2}|.{4,})'): (True),
+            ('f{4,}'): (False),
+            ('^[^A-Z]*$'): (True),
+            ('^[^a-z]*$'): (False),
+            ('^[^\\s]*$'): (False)
+        }
+
+        for params, expected in expected_matches.items():
+            self.source.filter_text = params
+            expected_match_result = expected
+            self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result)
 
 class TasksTestCase(TestCase):
     def setUp(self):

From d1cb7ef76c8be1ffe80d1dce1acf12b69cbc9757 Mon Sep 17 00:00:00 2001
From: locke4 <65832338+locke4@users.noreply.github.com>
Date: Tue, 24 Oct 2023 19:26:50 +0100
Subject: [PATCH 8/8] Delete
 tubesync/sync/migrations/0020_auto_20231024_1812.py

---
 .../migrations/0020_auto_20231024_1812.py     | 29 -------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 tubesync/sync/migrations/0020_auto_20231024_1812.py

diff --git a/tubesync/sync/migrations/0020_auto_20231024_1812.py b/tubesync/sync/migrations/0020_auto_20231024_1812.py
deleted file mode 100644
index 1fb49a9..0000000
--- a/tubesync/sync/migrations/0020_auto_20231024_1812.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Generated by Django 3.2.22 on 2023-10-24 17:12
-
-import django.core.validators
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('sync', '0019_add_delete_removed_media'),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name='source',
-            name='filter_text',
-            field=models.CharField(blank=True, default='.*', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
-        ),
-        migrations.AlterField(
-            model_name='source',
-            name='auto_subtitles',
-            field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
-        ),
-        migrations.AlterField(
-            model_name='source',
-            name='sub_langs',
-            field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
-        ),
-    ]