From e54a762a7bd93d8070a2b1057f16c131cb509755 Mon Sep 17 00:00:00 2001
From: meeb <meeb@meeb.org>
Date: Thu, 30 Nov 2023 18:52:32 +1100
Subject: [PATCH] rework skip logic check, prevent race condition between
 metadata downloading and upload date being checked, resolves #440, #183,
 related to #438

---
 tubesync/sync/models.py  |  4 +-
 tubesync/sync/signals.py | 96 ++++++++++++++++++++--------------------
 tubesync/sync/tasks.py   |  2 -
 3 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 729e21a..a5b8d68 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -547,7 +547,9 @@ class Source(models.Model):
             return ''
 
     def is_regex_match(self, media_item_title):
-        return bool(re.search(self.filter_text,media_item_title))       
+        if not self.filter_text:
+            return True
+        return bool(re.search(self.filter_text, media_item_title))
     
     def index_media(self):
         '''
diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index c15f15e..b92390e 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -96,65 +96,63 @@ def media_post_save(sender, instance, created, **kwargs):
     # If the media is skipped manually, bail.
     if instance.manual_skip:
         return
-
     # Triggered after media is saved
     cap_changed = False
     can_download_changed = False
     # Reset the skip flag if the download cap has changed if the media has not
     # already been downloaded
-    if not instance.downloaded:
+    if not instance.downloaded and instance.metadata:
         max_cap_age = instance.source.download_cap_date
-        filter_text = instance.source.filter_text
-        published = instance.published 
-
-        if instance.skip:
-            #currently marked to be skipped, check if skip conditions still apply
-            if not published:
-                log.debug(f'Media: {instance.source} / {instance} has no published date '
-                        f'set but is already marked to be skipped')
-            else:            
-                if max_cap_age and filter_text:
-                    if (published > max_cap_age) and (instance.source.is_regex_match(instance.title)):
-                        # Media was published after the cap date and matches the filter text, but is set to be skipped
-                        print('Has a valid publishing date and matches filter, marking unskipped')
-                        instance.skip = False
-                        cap_changed = True
-                    else:
-                        print('does not have a valid publishing date or filter string, already marked skipped')
-                        log.info(f'Media: {instance.source} / {instance} has no published date '
-                                f'set but is already marked to be skipped')
-                elif max_cap_age:
-                    if published > max_cap_age:
-                        # Media was published after the cap date but is set to be skipped
-                        log.info(f'Media: {instance.source} / {instance} has a valid '
-                                f'publishing date, marking to be unskipped')
-                        instance.skip = False
-                        cap_changed = True
-                elif filter_text:
-                    if instance.source.is_regex_match(instance.title):
-                        # Media matches the filter text but is set to be skipped
-                        log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
-                        instance.skip = False
-                        cap_changed = True
-        else:
-            if not published:
-                log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
+        filter_text = instance.source.filter_text.strip()
+        published = instance.published
+        if not published:
+            if not instance.skip:
+                log.warn(f'Media: {instance.source} / {instance} has no published date '
+                         f'set, marking to be skipped')
                 instance.skip = True
                 cap_changed = True
             else:
-                if max_cap_age:
-                    if published <= max_cap_age:            
-                        log.info(f'Media: {instance.source} / {instance} is too old for '
-                                f'the download cap date, marking to be skipped')
-                        instance.skip = True
+                log.debug(f'Media: {instance.source} / {instance} has no published date '
+                          f'set but is already marked to be skipped')
+        else:
+            if max_cap_age:
+                if published > max_cap_age and instance.skip:
+                    if filter_text:
+                        if instance.source.is_regex_match(instance.title):
+                            log.info(f'Media: {instance.source} / {instance} has a valid '
+                                    f'publishing date and title filter, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                        else:
+                            log.debug(f'Media: {instance.source} / {instance} has a valid publishing date '
+                                      f'but failed the title filter match, already marked skipped')
+                    else:
+                        log.info(f'Media: {instance.source} / {instance} has a valid '
+                                 f'publishing date, marking to be unskipped')
+                        instance.skip = False
                         cap_changed = True
-                if filter_text:
-                    if not instance.source.is_regex_match(instance.title):
-                        #media doesn't match the filter text but is not marked to be skipped
-                        log.info(f'Media: {instance.source} / {instance} does not match the filter text')
-                        instance.skip = True
-                        cap_changed = True
-      
+                elif published <= max_cap_age and not instance.skip:
+                    log.info(f'Media: {instance.source} / {instance} is too old for '
+                            f'the download cap date, marking to be skipped')
+                    instance.skip = True
+                    cap_changed = True
+            else:
+                if instance.skip:
+                    # Media marked to be skipped but source download cap removed
+                    if filter_text:
+                        if instance.source.is_regex_match(instance.title):
+                            log.info(f'Media: {instance.source} / {instance} has a valid '
+                                     f'publishing date and title filter, marking to be unskipped')
+                            instance.skip = False
+                            cap_changed = True
+                        else:
+                            log.info(f'Media: {instance.source} / {instance} has a valid publishing date '
+                                     f'but failed the title filter match, already marked skipped')
+                else:
+                    log.debug(f'Media: {instance.source} / {instance} has a valid publishing date and '
+                              f'is already marked as not to be skipped')
+
+                    cap_changed = False
     # Recalculate the "can_download" flag, this may
     # need to change if the source specifications have been changed
     if instance.metadata:
diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 7e79530..5ecfd5e 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -231,11 +231,9 @@ def download_media_metadata(media_id):
         log.error(f'Task download_media_metadata(pk={media_id}) called but no '
                   f'media exists with ID: {media_id}')
         return
-    
     if media.manual_skip:
         log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
         return
-
     source = media.source
     metadata = media.index_metadata()
     media.metadata = json.dumps(metadata, default=json_serial)