refactoring

This commit is contained in:
meeb 2020-12-08 16:56:43 +11:00
parent fa25e162b9
commit 195a1bef4d
7 changed files with 315 additions and 247 deletions

245
app/sync/matching.py Normal file
View File

@ -0,0 +1,245 @@
'''
Match functions take a single Media object instance as its only argument and return
two boolean values. The first value is if the match was exact or "best fit", the
second argument is the ID of the format that was matched.
'''
from django.conf import settings
min_height = getattr(settings, 'VIDEO_HEIGHT_CUTOFF', 360)
fallback_hd_cutoff = getattr(settings, 'VIDEO_HEIGHT_IS_HD', 500)
def get_best_combined_format(media):
'''
Attempts to see if there is a single, combined audio and video format that
exactly matches the source requirements. This is used over separate audio
and video formats if possible. Combined formats are the easiest to check
for as they must exactly match the source profile be be valid.
'''
for fmt in media.iter_formats():
# Check height matches
if media.source.source_resolution.strip().upper() != fmt['format']:
continue
# Check the video codec matches
if media.source.source_vcodec != fmt['vcodec']:
continue
# Check the audio codec matches
if media.source.source_acodec != fmt['acodec']:
continue
# if the source prefers 60fps, check for it
if media.source.prefer_60fps:
if not fmt['is_60fps']:
continue
# If the source prefers HDR, check for it
if media.source.prefer_hdr:
if not fmt['is_hdr']:
continue
# If we reach here, we have a combined match!
return True, fmt['id']
return False, False
def get_best_audio_format(media):
'''
Finds the best match for the source required audio format. If the source
has a 'fallback' of fail this can return no match.
'''
# Order all audio-only formats by bitrate
audio_formats = []
for fmt in media.iter_formats():
# If the format has a video stream, skip it
if fmt['vcodec']:
continue
audio_formats.append(fmt)
audio_formats = list(reversed(sorted(audio_formats, key=lambda k: k['abr'])))
if not audio_formats:
# Media has no audio formats at all
return False, False
# Find the highest bitrate audio format with a matching codec
for fmt in audio_formats:
if media.source.source_acodec == fmt['acodec']:
# Matched!
return True, fmt['id']
# No codecs matched
if media.source.can_fallback:
# Can fallback, find the next highest bitrate non-matching codec
return False, audio_formats[0]
else:
# Can't fallback
return False, False
def get_best_video_format(media):
'''
Finds the best match for the source required video format. If the source
has a 'fallback' of fail this can return no match. Resolution is treated
as the most important factor to match. This is pretty verbose due to the
'soft' matching requirements for prefer_hdr and prefer_60fps.
'''
# Check if the source wants audio only, fast path to return
if media.source.is_audio:
return False, False
# Filter video-only formats by resolution that matches the source
video_formats = []
for fmt in media.iter_formats():
# If the format has an audio stream, skip it
if fmt['acodec']:
continue
if media.source.source_resolution.strip().upper() == fmt['format']:
video_formats.append(fmt)
# Check we matched some streams
if not video_formats:
# No streams match the requested resolution, see if we can fallback
if media.source.can_fallback:
# Find the next-best format matches by height
for fmt in media.iter_formats():
# If the format has an audio stream, skip it
if fmt['acodec']:
continue
if (fmt['height'] <= media.source.source_resolution_height and
fmt['height'] >= min_height):
video_formats.append(fmt)
else:
# Can't fallback
return False, False
video_formats = list(reversed(sorted(video_formats, key=lambda k: k['height'])))
print('height', media.source.source_resolution_height)
print('video_formats', video_formats)
if not video_formats:
# Still no matches
return False, False
exact_match, best_match = None, None
# Of our filtered video formats, check for resolution + codec + hdr + fps match
if media.source.prefer_60fps and media.source.prefer_hdr:
for fmt in video_formats:
# Check for an exact match
if (media.source.source_resolution.strip().upper() == fmt['format'] and
media.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr'] and
fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if media.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec, hdr and fps match but drop the resolution
if (media.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr'] and fmt['is_60fps']):
# Close match
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for hdr and fps match but drop the resolution and codec
if fmt['is_hdr'] and fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for fps match but drop the resolution and codec and hdr
if fmt['is_hdr'] and fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# Check for resolution + codec + fps match
if media.source.prefer_60fps and not media.source.prefer_hdr:
for fmt in video_formats:
# Check for an exact match
if (media.source.source_resolution.strip().upper() == fmt['format'] and
media.source.source_vcodec == fmt['vcodec'] and
fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if media.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec and fps match but drop the resolution
if (media.source.source_vcodec == fmt['vcodec'] and
fmt['is_60fps']):
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for an fps match but drop the resolution and codec
if fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# Check for resolution + codec + hdr
if media.source.prefer_hdr and not media.source.prefer_60fps:
for fmt in video_formats:
# Check for an exact match
if (media.source.source_resolution.strip().upper() == fmt['format'] and
media.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr']):
# Exact match
exact_match, best_match = True, fmt
break
if media.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec and hdr match but drop the resolution
if (media.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr']):
exact_match, best_match = True, fmt
break
if not best_match:
for fmt in video_formats:
# Check for an hdr match but drop the resolution and codec
if fmt['is_hdr']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# check for resolution + codec
if not media.source.prefer_hdr and not media.source.prefer_60fps:
for fmt in video_formats:
# Check for an exact match
if (media.source.source_resolution.strip().upper() == fmt['format'] and
media.source.source_vcodec == fmt['vcodec'] and
not fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if media.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec match without 60fps and drop the resolution
if (media.source.source_vcodec == fmt['vcodec'] and
not fmt['is_60fps']):
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for a codec match but drop the resolution
if media.source.source_vcodec == fmt['vcodec']:
# Close match
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# See if we found a match
if best_match:
# Final check to see if the match we found was good enough
if exact_match:
return True, best_match['id']
elif media.source.can_fallback:
# Allow the fallback if it meets requirements
if (media.source.fallback == media.source.FALLBACK_NEXT_BEST_HD and
best_match['height'] >= fallback_hd_cutoff):
return False, best_match['id']
elif media.source.fallback == media.source.FALLBACK_NEXT_BEST:
return False, best_match['id']
# Nope, failed to find match
return False, False

View File

@ -8,6 +8,8 @@ from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from .youtube import get_media_info as get_youtube_media_info
from .utils import seconds_to_timestr, parse_media_format
from .matching import (get_best_combined_format, get_best_audio_format,
get_best_video_format)
class Source(models.Model):
@ -259,7 +261,7 @@ class Source(models.Model):
depending on audio codec.
'''
if self.is_audio:
if self.source_acodec == self.SOURCE_ACODEC_M4A:
if self.source_acodec == self.SOURCE_ACODEC_MP4A:
return 'm4a'
elif self.source_acodec == self.SOURCE_ACODEC_OPUS:
return 'ogg'
@ -284,9 +286,9 @@ class Source(models.Model):
else:
vc = self.source_vcodec
ac = self.source_acodec
f = '60FPS' if self.prefer_60fps else ''
h = 'HDR' if self.prefer_hdr else ''
return f'{self.source_resolution} (video:{vc}, audio:{ac}) {f} {h}'.strip()
f = ' 60FPS' if self.is_video and self.prefer_60fps else ''
h = ' HDR' if self.is_video and self.prefer_hdr else ''
return f'{self.source_resolution} (video:{vc}, audio:{ac}){f}{h}'.strip()
@property
def directory_path(self):
@ -506,251 +508,47 @@ class Media(models.Model):
yield parse_media_format(fmt)
def get_best_combined_format(self):
'''
Attempts to see if there is a single, combined audio and video format that
exactly matches the source requirements. This is used over separate audio
and video formats if possible. Combined formats are the easiest to check
for as they must exactly match the source profile be be valid.
'''
for fmt in self.iter_formats():
# Check height matches
if self.source.source_resolution.strip().upper() != fmt['format']:
continue
# Check the video codec matches
if self.source.source_vcodec != fmt['vcodec']:
continue
# Check the audio codec matches
if self.source.source_acodec != fmt['acodec']:
continue
# if the source prefers 60fps, check for it
if self.source.prefer_60fps:
if not fmt['is_60fps']:
continue
# If the source prefers HDR, check for it
if self.source.prefer_hdr:
if not fmt['is_hdr']:
continue
# If we reach here, we have a combined match!
return True, fmt['id']
return False, False
return get_best_combined_format(self)
def get_best_audio_format(self):
'''
Finds the best match for the source required audio format. If the source
has a 'fallback' of fail this can return no match.
'''
# Order all audio-only formats by bitrate
audio_formats = []
for fmt in self.iter_formats():
# If the format has a video stream, skip it
if fmt['vcodec']:
continue
audio_formats.append(fmt)
audio_formats = list(reversed(sorted(audio_formats, key=lambda k: k['abr'])))
if not audio_formats:
# Media has no audio formats at all
return False, False
# Find the highest bitrate audio format with a matching codec
for fmt in audio_formats:
if self.source.source_acodec == fmt['acodec']:
# Matched!
return True, fmt['id']
# No codecs matched
if self.source.can_fallback:
# Can fallback, find the next highest bitrate non-matching codec
return False, audio_formats[0]
else:
# Can't fallback
return False, False
return get_best_audio_format(self)
def get_best_video_format(self):
'''
Finds the best match for the source required video format. If the source
has a 'fallback' of fail this can return no match. Resolution is treated
as the most important factor to match.
'''
min_height = getattr(settings, 'VIDEO_HEIGHT_CUTOFF', 360)
fallback_hd_cutoff = getattr(settings, 'VIDEO_HEIGHT_IS_HD', 500)
# Filter video-only formats by resolution that matches the source
video_formats = []
for fmt in self.iter_formats():
# If the format has an audio stream, skip it
if fmt['acodec']:
continue
if self.source.source_resolution.strip().upper() == fmt['format']:
video_formats.append(fmt)
# Check we matched some streams
if not video_formats:
# No streams match the requested resolution, see if we can fallback
if self.source.can_fallback:
# Find the next-best format matches by height
for fmt in self.iter_formats():
# If the format has an audio stream, skip it
if fmt['acodec']:
continue
if (fmt['height'] <= self.source.source_resolution_height and
fmt['height'] >= min_height):
video_formats.append(fmt)
else:
# Can't fallback
return False, False
video_formats = list(reversed(sorted(video_formats, key=lambda k: k['height'])))
if not video_formats:
# Still no matches
return False, False
exact_match, best_match = None, None
# Of our filtered video formats, check for resolution + codec + hdr + fps match
if self.source.prefer_60fps and self.source.prefer_hdr:
for fmt in video_formats:
# Check for an exact match
if (self.source.source_resolution.strip().upper() == fmt['format'] and
self.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr'] and
fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if self.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec, hdr and fps match but drop the resolution
if (self.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr'] and fmt['is_60fps']):
# Close match
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for hdr and fps match but drop the resolution and codec
if fmt['is_hdr'] and fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for fps match but drop the resolution and codec and hdr
if fmt['is_hdr'] and fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# Check for resolution + codec + fps match
if self.source.prefer_60fps and not self.source.prefer_hdr:
for fmt in video_formats:
# Check for an exact match
if (self.source.source_resolution.strip().upper() == fmt['format'] and
self.source.source_vcodec == fmt['vcodec'] and
fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if self.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec and fps match but drop the resolution
if (self.source.source_vcodec == fmt['vcodec'] and
fmt['is_60fps']):
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for an fps match but drop the resolution and codec
if fmt['is_60fps']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# Check for resolution + codec + hdr
if self.source.prefer_hdr and not self.source.prefer_60fps:
for fmt in video_formats:
# Check for an exact match
if (self.source.source_resolution.strip().upper() == fmt['format'] and
self.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr']):
# Exact match
exact_match, best_match = True, fmt
break
if self.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec and hdr match but drop the resolution
if (self.source.source_vcodec == fmt['vcodec'] and
fmt['is_hdr']):
exact_match, best_match = True, fmt
break
if not best_match:
for fmt in video_formats:
# Check for an hdr match but drop the resolution and codec
if fmt['is_hdr']:
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# check for resolution + codec
if not self.source.prefer_hdr and not self.source.prefer_60fps:
for fmt in video_formats:
# Check for an exact match
if (self.source.source_resolution.strip().upper() == fmt['format'] and
self.source.source_vcodec == fmt['vcodec'] and
not fmt['is_60fps']):
# Exact match
exact_match, best_match = True, fmt
break
if self.source.can_fallback:
if not best_match:
for fmt in video_formats:
# Check for a codec match without 60fps and drop the resolution
if (self.source.source_vcodec == fmt['vcodec'] and
not fmt['is_60fps']):
exact_match, best_match = False, fmt
break
if not best_match:
for fmt in video_formats:
# Check for a codec match but drop the resolution
if self.source.source_vcodec == fmt['vcodec']:
# Close match
exact_match, best_match = False, fmt
break
if not best_match:
# Match the highest resolution
exact_match, best_match = False, video_formats[0]
# See if we found a match
if best_match:
# Final check to see if the match we found was good enough
if exact_match:
return True, best_match['id']
elif self.source.can_fallback:
# Allow the fallback if it meets requirements
if (self.source.fallback == self.source.FALLBACK_NEXT_BEST_HD and
best_match['height'] >= fallback_hd_cutoff):
return False, best_match['id']
elif self.source.fallback == self.source.FALLBACK_NEXT_BEST:
return False, best_match['id']
# Nope, failed to find match
return False, False
return get_best_video_format(self)
def get_format_str(self):
'''
Returns a youtube-dl compatible format string for the best matches
combination of source requirements and available audio and video formats.
Returns boolean False if there is no valid downloadable combo.
'''
if self.source.is_audio:
audio_format = self.get_best_audio_format()
return 'a'
else:
combined_format = self.get_best_combined_format()
if combined_format:
return 'c'
audio_match, audio_format = self.get_best_audio_format()
if audio_format:
return str(audio_format)
else:
audio_format = self.get_best_audio_format()
video_format = self.get_best_video_format()
return 'a+v'
return False
else:
combined_match, combined_format = self.get_best_combined_format()
if combined_format:
return str(combined_format)
else:
audio_match, audio_format = self.get_best_audio_format()
video_match, video_format = self.get_best_video_format()
if audio_format and video_format:
return f'{audio_format}+{video_format}'
else:
return False
return False
@property
def can_download(self):
'''
Returns boolean True if the media can be downloaded, that is, the media
has stored formats which are compatible with the source requirements.
'''
return self.get_format_str() is not False
@property
def loaded_metadata(self):
if self.pk in _metadata_cache:

View File

@ -49,7 +49,7 @@ def source_post_save(sender, instance, created, **kwargs):
@receiver(pre_delete, sender=Source)
def source_post_delete(sender, instance, **kwargs):
def source_pre_delete(sender, instance, **kwargs):
# Triggered before a source is deleted, delete all media objects to trigger
# the Media models post_delete signal
for media in Media.objects.filter(source=instance):
@ -66,7 +66,7 @@ def source_post_delete(sender, instance, **kwargs):
@receiver(task_failed, sender=Task)
def task_task_failed(sender, task_id, completed_task, **kwargs):
# Triggered after a source fails by reaching its max retry attempts
# Triggered after a task fails by reaching its max retry attempts
obj, url = map_task_to_instance(completed_task)
if isinstance(obj, Source):
log.error(f'Permanent failure for source: {obj} task: {completed_task}')
@ -78,7 +78,7 @@ def task_task_failed(sender, task_id, completed_task, **kwargs):
def media_post_save(sender, instance, created, **kwargs):
# Triggered after media is saved
if created:
# If the media is newly created fire a task off to download its thumbnail
# If the media is newly created start a task to download its thumbnail
metadata = instance.loaded_metadata
thumbnail_url = metadata.get('thumbnail', '')
if thumbnail_url:

View File

@ -33,7 +33,7 @@ def get_hash(task_name, pk):
def map_task_to_instance(task):
'''
Reverse-maps an scheduled backgrond task to an instance. Requires the task name
Reverse-maps a scheduled backgrond task to an instance. Requires the task name
to be a known task function and the first argument to be a UUID. This is used
because UUID's are incompatible with background_task's "creator" feature.
'''
@ -45,6 +45,17 @@ def map_task_to_instance(task):
Source: 'sync:source',
Media: 'sync:media-item',
}
# If the task has a UUID set in its .queue it's probably a link to a Source
if task.queue:
try:
queue_uuid = uuid.UUID(task.queue)
try:
return Source.objects.get(pk=task.queue)
except Source.DoesNotExist:
pass
except (TypeError, ValueError, AttributeError):
pass
# Unpack
task_func, task_args_str = task.task_name, task.task_params
model = TASK_MAP.get(task_func, None)
if not model:
@ -75,7 +86,8 @@ def map_task_to_instance(task):
def get_error_message(task):
'''
Extract an error message from a failed task.
Extract an error message from a failed task. This is the last line of the
last_error field with the method name removed.
'''
if not task.has_error():
return ''
@ -92,8 +104,7 @@ def get_source_completed_tasks(source_id, only_errors=False):
'''
Returns a queryset of CompletedTask objects for a source by source ID.
'''
source_hash = get_hash('sync.tasks.index_source_task', source_id)
q = {'task_hash': source_hash}
q = {'queue': source_id}
if only_errors:
q['failed_at__isnull'] = False
return CompletedTask.objects.filter(**q).order_by('-failed_at')
@ -163,7 +174,7 @@ def index_source_task(source_id):
@background(schedule=0)
def download_media_thumbnail(media_id, url):
'''
Downloads an image from a URL and saves it as a local thumbnail attached to a
Downloads an image from a URL and save it as a local thumbnail attached to a
Media object.
'''
try:

View File

@ -41,6 +41,10 @@
<td class="hide-on-small-only">Downloaded</td>
<td><span class="hide-on-med-and-up">Downloaded<br></span><strong>{% if media.downloaded %}<i class="fas fa-check"></i>{% else %}<i class="fas fa-times"></i>{% endif %}</strong></td>
</tr>
<tr title="Can the media be downloaded">
<td class="hide-on-small-only">Can download</td>
<td><span class="hide-on-med-and-up">Can download<br></span><strong>{% if youtube_dl_format %}<i class="fas fa-check"></i>{% else %}<i class="fas fa-times"></i>{% endif %}</strong></td>
</tr>
<tr title="The available media formats">
<td class="hide-on-small-only">Available formats</td>
<td><span class="hide-on-med-and-up">Available formats<br></span>
@ -59,6 +63,10 @@
Video: <strong>{% if video_format %}{{ video_format }} {% if video_exact %}(exact match){% else %}(fallback){% endif %}{% else %}No match{% endif %}
</strong></td>
</tr>
<tr title="Format string passed to youtube-dl">
<td class="hide-on-small-only">youtube-dl format</td>
<td><span class="hide-on-med-and-up">youtube-dl format<br></span><strong>{% if youtube_dl_format %}{{ youtube_dl_format }}{% else %}No matching formats{% endif %}</strong></td>
</tr>
</table>
</div>
</div>

View File

@ -126,6 +126,11 @@ def seconds_to_timestr(seconds):
def parse_media_format(format_dict):
'''
This parser primarily adapts the format dict returned by youtube-dl into a
standard form used by the matchers in matching.py. If youtube-dl changes
any internals, update it here.
'''
vcodec_full = format_dict.get('vcodec', '')
vcodec_parts = vcodec_full.split('.')
if len(vcodec_parts) > 0:

View File

@ -23,7 +23,7 @@ from . import youtube
class DashboardView(TemplateView):
'''
The dashboard shows non-interactive totals and summaries, nothing more.
The dashboard shows non-interactive totals and summaries.
'''
template_name = 'sync/dashboard.html'
@ -349,8 +349,8 @@ class MediaView(ListView):
class MediaThumbView(DetailView):
'''
Shows a media thumbnail. Whitenose doesn't support post-start media image
serving and the images here are pretty small, just serve them manually. This
Shows a media thumbnail. Whitenoise doesn't support post-start media image
serving and the images here are pretty small so just serve them manually. This
isn't fast, but it's not likely to be a serious bottleneck.
'''
@ -389,12 +389,13 @@ class MediaItemView(DetailView):
data['audio_format'] = audio_format
data['video_exact'] = video_exact
data['video_format'] = video_format
data['youtube_dl_format'] = self.object.get_format_str()
return data
class TasksView(ListView):
'''
A list of tasks queued to be completed. Typically, this is scraping for new
A list of tasks queued to be completed. This is, for example, scraping for new
media or downloading media.
'''