rework skip logic check, prevent race condition between metadata downloading and upload date being checked, resolves #440, #183, related to #438

This commit is contained in:
meeb 2023-11-30 18:52:32 +11:00
parent 512b70adad
commit e54a762a7b
3 changed files with 50 additions and 52 deletions

View File

@ -547,7 +547,9 @@ class Source(models.Model):
return '' return ''
def is_regex_match(self, media_item_title): def is_regex_match(self, media_item_title):
return bool(re.search(self.filter_text,media_item_title)) if not self.filter_text:
return True
return bool(re.search(self.filter_text, media_item_title))
def index_media(self): def index_media(self):
''' '''

View File

@ -96,65 +96,63 @@ def media_post_save(sender, instance, created, **kwargs):
# If the media is skipped manually, bail. # If the media is skipped manually, bail.
if instance.manual_skip: if instance.manual_skip:
return return
# Triggered after media is saved # Triggered after media is saved
cap_changed = False cap_changed = False
can_download_changed = False can_download_changed = False
# Reset the skip flag if the download cap has changed if the media has not # Reset the skip flag if the download cap has changed if the media has not
# already been downloaded # already been downloaded
if not instance.downloaded: if not instance.downloaded and instance.metadata:
max_cap_age = instance.source.download_cap_date max_cap_age = instance.source.download_cap_date
filter_text = instance.source.filter_text filter_text = instance.source.filter_text.strip()
published = instance.published published = instance.published
if not published:
if instance.skip: if not instance.skip:
#currently marked to be skipped, check if skip conditions still apply log.warn(f'Media: {instance.source} / {instance} has no published date '
if not published: f'set, marking to be skipped')
log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
else:
if max_cap_age and filter_text:
if (published > max_cap_age) and (instance.source.is_regex_match(instance.title)):
# Media was published after the cap date and matches the filter text, but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False
cap_changed = True
else:
print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
elif max_cap_age:
if published > max_cap_age:
# Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True
elif filter_text:
if instance.source.is_regex_match(instance.title):
# Media matches the filter text but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
instance.skip = True instance.skip = True
cap_changed = True cap_changed = True
else: else:
if max_cap_age: log.debug(f'Media: {instance.source} / {instance} has no published date '
if published <= max_cap_age: f'set but is already marked to be skipped')
log.info(f'Media: {instance.source} / {instance} is too old for ' else:
f'the download cap date, marking to be skipped') if max_cap_age:
instance.skip = True if published > max_cap_age and instance.skip:
cap_changed = True if filter_text:
if filter_text: if instance.source.is_regex_match(instance.title):
if not instance.source.is_regex_match(instance.title): log.info(f'Media: {instance.source} / {instance} has a valid '
#media doesn't match the filter text but is not marked to be skipped f'publishing date and title filter, marking to be unskipped')
log.info(f'Media: {instance.source} / {instance} does not match the filter text') instance.skip = False
instance.skip = True cap_changed = True
else:
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date '
f'but failed the title filter match, already marked skipped')
else:
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped')
instance.skip = False
cap_changed = True cap_changed = True
elif published <= max_cap_age and not instance.skip:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
instance.skip = True
cap_changed = True
else:
if instance.skip:
# Media marked to be skipped but source download cap removed
if filter_text:
if instance.source.is_regex_match(instance.title):
log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date and title filter, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
log.info(f'Media: {instance.source} / {instance} has a valid publishing date '
f'but failed the title filter match, already marked skipped')
else:
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date and '
f'is already marked as not to be skipped')
cap_changed = False
# Recalculate the "can_download" flag, this may # Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed # need to change if the source specifications have been changed
if instance.metadata: if instance.metadata:

View File

@ -231,11 +231,9 @@ def download_media_metadata(media_id):
log.error(f'Task download_media_metadata(pk={media_id}) called but no ' log.error(f'Task download_media_metadata(pk={media_id}) called but no '
f'media exists with ID: {media_id}') f'media exists with ID: {media_id}')
return return
if media.manual_skip: if media.manual_skip:
log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.') log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
return return
source = media.source source = media.source
metadata = media.index_metadata() metadata = media.index_metadata()
media.metadata = json.dumps(metadata, default=json_serial) media.metadata = json.dumps(metadata, default=json_serial)