rework skip logic check, prevent race condition between metadata downloading and upload date being checked, resolves #440, #183, related to #438
This commit is contained in:
parent
512b70adad
commit
e54a762a7b
|
@ -547,6 +547,8 @@ class Source(models.Model):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def is_regex_match(self, media_item_title):
|
def is_regex_match(self, media_item_title):
|
||||||
|
if not self.filter_text:
|
||||||
|
return True
|
||||||
return bool(re.search(self.filter_text, media_item_title))
|
return bool(re.search(self.filter_text, media_item_title))
|
||||||
|
|
||||||
def index_media(self):
|
def index_media(self):
|
||||||
|
|
|
@ -96,65 +96,63 @@ def media_post_save(sender, instance, created, **kwargs):
|
||||||
# If the media is skipped manually, bail.
|
# If the media is skipped manually, bail.
|
||||||
if instance.manual_skip:
|
if instance.manual_skip:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Triggered after media is saved
|
# Triggered after media is saved
|
||||||
cap_changed = False
|
cap_changed = False
|
||||||
can_download_changed = False
|
can_download_changed = False
|
||||||
# Reset the skip flag if the download cap has changed if the media has not
|
# Reset the skip flag if the download cap has changed if the media has not
|
||||||
# already been downloaded
|
# already been downloaded
|
||||||
if not instance.downloaded:
|
if not instance.downloaded and instance.metadata:
|
||||||
max_cap_age = instance.source.download_cap_date
|
max_cap_age = instance.source.download_cap_date
|
||||||
filter_text = instance.source.filter_text
|
filter_text = instance.source.filter_text.strip()
|
||||||
published = instance.published
|
published = instance.published
|
||||||
|
|
||||||
if instance.skip:
|
|
||||||
#currently marked to be skipped, check if skip conditions still apply
|
|
||||||
if not published:
|
if not published:
|
||||||
|
if not instance.skip:
|
||||||
|
log.warn(f'Media: {instance.source} / {instance} has no published date '
|
||||||
|
f'set, marking to be skipped')
|
||||||
|
instance.skip = True
|
||||||
|
cap_changed = True
|
||||||
|
else:
|
||||||
log.debug(f'Media: {instance.source} / {instance} has no published date '
|
log.debug(f'Media: {instance.source} / {instance} has no published date '
|
||||||
f'set but is already marked to be skipped')
|
f'set but is already marked to be skipped')
|
||||||
else:
|
else:
|
||||||
if max_cap_age and filter_text:
|
if max_cap_age:
|
||||||
if (published > max_cap_age) and (instance.source.is_regex_match(instance.title)):
|
if published > max_cap_age and instance.skip:
|
||||||
# Media was published after the cap date and matches the filter text, but is set to be skipped
|
if filter_text:
|
||||||
print('Has a valid publishing date and matches filter, marking unskipped')
|
if instance.source.is_regex_match(instance.title):
|
||||||
|
log.info(f'Media: {instance.source} / {instance} has a valid '
|
||||||
|
f'publishing date and title filter, marking to be unskipped')
|
||||||
instance.skip = False
|
instance.skip = False
|
||||||
cap_changed = True
|
cap_changed = True
|
||||||
else:
|
else:
|
||||||
print('does not have a valid publishing date or filter string, already marked skipped')
|
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date '
|
||||||
log.info(f'Media: {instance.source} / {instance} has no published date '
|
f'but failed the title filter match, already marked skipped')
|
||||||
f'set but is already marked to be skipped')
|
else:
|
||||||
elif max_cap_age:
|
|
||||||
if published > max_cap_age:
|
|
||||||
# Media was published after the cap date but is set to be skipped
|
|
||||||
log.info(f'Media: {instance.source} / {instance} has a valid '
|
log.info(f'Media: {instance.source} / {instance} has a valid '
|
||||||
f'publishing date, marking to be unskipped')
|
f'publishing date, marking to be unskipped')
|
||||||
instance.skip = False
|
instance.skip = False
|
||||||
cap_changed = True
|
cap_changed = True
|
||||||
elif filter_text:
|
elif published <= max_cap_age and not instance.skip:
|
||||||
if instance.source.is_regex_match(instance.title):
|
|
||||||
# Media matches the filter text but is set to be skipped
|
|
||||||
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
|
|
||||||
instance.skip = False
|
|
||||||
cap_changed = True
|
|
||||||
else:
|
|
||||||
if not published:
|
|
||||||
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
|
|
||||||
instance.skip = True
|
|
||||||
cap_changed = True
|
|
||||||
else:
|
|
||||||
if max_cap_age:
|
|
||||||
if published <= max_cap_age:
|
|
||||||
log.info(f'Media: {instance.source} / {instance} is too old for '
|
log.info(f'Media: {instance.source} / {instance} is too old for '
|
||||||
f'the download cap date, marking to be skipped')
|
f'the download cap date, marking to be skipped')
|
||||||
instance.skip = True
|
instance.skip = True
|
||||||
cap_changed = True
|
cap_changed = True
|
||||||
|
else:
|
||||||
|
if instance.skip:
|
||||||
|
# Media marked to be skipped but source download cap removed
|
||||||
if filter_text:
|
if filter_text:
|
||||||
if not instance.source.is_regex_match(instance.title):
|
if instance.source.is_regex_match(instance.title):
|
||||||
#media doesn't match the filter text but is not marked to be skipped
|
log.info(f'Media: {instance.source} / {instance} has a valid '
|
||||||
log.info(f'Media: {instance.source} / {instance} does not match the filter text')
|
f'publishing date and title filter, marking to be unskipped')
|
||||||
instance.skip = True
|
instance.skip = False
|
||||||
cap_changed = True
|
cap_changed = True
|
||||||
|
else:
|
||||||
|
log.info(f'Media: {instance.source} / {instance} has a valid publishing date '
|
||||||
|
f'but failed the title filter match, already marked skipped')
|
||||||
|
else:
|
||||||
|
log.debug(f'Media: {instance.source} / {instance} has a valid publishing date and '
|
||||||
|
f'is already marked as not to be skipped')
|
||||||
|
|
||||||
|
cap_changed = False
|
||||||
# Recalculate the "can_download" flag, this may
|
# Recalculate the "can_download" flag, this may
|
||||||
# need to change if the source specifications have been changed
|
# need to change if the source specifications have been changed
|
||||||
if instance.metadata:
|
if instance.metadata:
|
||||||
|
|
|
@ -231,11 +231,9 @@ def download_media_metadata(media_id):
|
||||||
log.error(f'Task download_media_metadata(pk={media_id}) called but no '
|
log.error(f'Task download_media_metadata(pk={media_id}) called but no '
|
||||||
f'media exists with ID: {media_id}')
|
f'media exists with ID: {media_id}')
|
||||||
return
|
return
|
||||||
|
|
||||||
if media.manual_skip:
|
if media.manual_skip:
|
||||||
log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
|
log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
|
||||||
return
|
return
|
||||||
|
|
||||||
source = media.source
|
source = media.source
|
||||||
metadata = media.index_metadata()
|
metadata = media.index_metadata()
|
||||||
media.metadata = json.dumps(metadata, default=json_serial)
|
media.metadata = json.dumps(metadata, default=json_serial)
|
||||||
|
|
Loading…
Reference in New Issue