diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index aa09dcd..9826f9d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -113,6 +113,12 @@ class Source(models.Model): SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'https://www.youtube.com/channel/{key}', SOURCE_TYPE_YOUTUBE_PLAYLIST: 'https://www.youtube.com/playlist?list={key}', } + # Format used to create indexable URLs + INDEX_URLS = { + SOURCE_TYPE_YOUTUBE_CHANNEL: 'https://www.youtube.com/c/{key}/videos', + SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'https://www.youtube.com/channel/{key}/videos', + SOURCE_TYPE_YOUTUBE_PLAYLIST: 'https://www.youtube.com/playlist?list={key}', + } # Callback functions to get a list of media from the source INDEXERS = { SOURCE_TYPE_YOUTUBE_CHANNEL: get_youtube_media_info, @@ -341,10 +347,19 @@ class Source(models.Model): url = obj.URLS.get(source_type) return url.format(key=key) + @classmethod + def create_index_url(obj, source_type, key): + url = obj.INDEX_URLS.get(source_type) + return url.format(key=key) + @property def url(self): return Source.create_url(self.source_type, self.key) + @property + def index_url(self): + return Source.create_index_url(self.source_type, self.key) + @property def format_summary(self): if self.source_resolution == Source.SOURCE_RESOLUTION_AUDIO: @@ -437,25 +452,8 @@ class Source(models.Model): indexer = self.INDEXERS.get(self.source_type, None) if not callable(indexer): raise Exception(f'Source type f"{self.source_type}" has no indexer') - response = indexer(self.url) - - # Account for nested playlists, such as a channel of playlists of playlists - def _recurse_playlists(playlist): - videos = [] - if not playlist: - return videos - entries = playlist.get('entries', []) - for entry in entries: - if not entry: - continue - subentries = entry.get('entries', []) - if subentries: - videos = videos + _recurse_playlists(entry) - else: - videos.append(entry) - return videos - - return _recurse_playlists(response) + response = indexer(self.index_url) + return response.get('entries', []) def get_media_thumb_path(instance, filename): @@ -481,6 +479,12 @@ class Media(models.Model): Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'https://www.youtube.com/watch?v={key}', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'https://www.youtube.com/watch?v={key}', } + # Callback functions to get a list of media from the source + INDEXERS = { + Source.SOURCE_TYPE_YOUTUBE_CHANNEL: get_youtube_media_info, + Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: get_youtube_media_info, + Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: get_youtube_media_info, + } # Maps standardised names to names used in source metdata METADATA_FIELDS = { 'upload_date': { @@ -904,6 +908,10 @@ class Media(models.Model): 'hdr': display_format['hdr'], } + @property + def has_metadata(self): + return self.metadata is not None + @property def loaded_metadata(self): try: @@ -1180,6 +1188,16 @@ class Media(models.Model): # Return the download paramaters return format_str, self.source.extension + def index_metadata(self): + ''' + Index the media metadata returning a dict of info. + ''' + indexer = self.INDEXERS.get(self.source.source_type, None) + if not callable(indexer): + raise Exception(f'Meida with source type f"{self.source.source_type}" ' + f'has no indexer') + return indexer(self.url) + class MediaServer(models.Model): ''' diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index c5d3fcd..a27cd91 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -8,8 +8,9 @@ from background_task.models import Task from common.logger import log from .models import Source, Media, MediaServer from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task, - download_media_thumbnail, map_task_to_instance, - check_source_directory_exists, download_media, rescan_media_server) + download_media_thumbnail, download_media_metadata, + map_task_to_instance, check_source_directory_exists, + download_media, rescan_media_server) from .utils import delete_file @@ -93,16 +94,27 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): def media_post_save(sender, instance, created, **kwargs): # Triggered after media is saved, Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed - post_save.disconnect(media_post_save, sender=Media) - if instance.get_format_str(): - if not instance.can_download: - instance.can_download = True - instance.save() - else: - if instance.can_download: - instance.can_download = False - instance.save() - post_save.connect(media_post_save, sender=Media) + if instance.metadata: + post_save.disconnect(media_post_save, sender=Media) + if instance.get_format_str(): + if not instance.can_download: + instance.can_download = True + instance.save() + else: + if instance.can_download: + instance.can_download = False + instance.save() + post_save.connect(media_post_save, sender=Media) + # If the media is missing metadata schedule it to be downloaded + if not instance.metadata: + log.info(f'Scheduling task to download metadata for: {instance.url}') + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(instance.pk), + priority=10, + verbose_name=verbose_name.format(instance.pk), + remove_existing_tasks=True + ) # If the media is missing a thumbnail schedule it to be downloaded if not instance.thumb_file_exists: instance.thumb = None diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index e33e674..21c489a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -179,30 +179,6 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source - media.metadata = json.dumps(video) - upload_date = media.upload_date - # Media must have a valid upload date - if upload_date: - media.published = timezone.make_aware(upload_date) - else: - log.error(f'Media has no upload date, skipping: {source} / {media}') - continue - # If the source has a download cap date check the upload date is allowed - max_cap_age = source.download_cap_date - if max_cap_age: - if media.published < max_cap_age: - # Media was published after the cap date, skip it - log.warn(f'Media: {source} / {media} is older than cap age ' - f'{max_cap_age}, skipping') - continue - # If the source has a cut-off check the upload date is within the allowed delta - if source.delete_old_media and source.days_to_keep > 0: - delta = timezone.now() - timedelta(days=source.days_to_keep) - if media.published < delta: - # Media was published after the cutoff date, skip it - log.warn(f'Media: {source} / {media} is older than ' - f'{source.days_to_keep} days, skipping') - continue try: media.save() log.info(f'Indexed media: {source} / {media}') @@ -234,6 +210,56 @@ def check_source_directory_exists(source_id): source.make_directory() +@background(schedule=0) +def download_media_metadata(media_id): + ''' + Downloads the metadata for a media item. + ''' + try: + media = Media.objects.get(pk=media_id) + except Media.DoesNotExist: + # Task triggered but the media no longer exists, do nothing + log.error(f'Task download_media_metadata(pk={media_id}) called but no ' + f'media exists with ID: {media_id}') + return + source = media.source + metadata = media.index_metadata() + media.metadata = json.dumps(metadata) + upload_date = media.upload_date + # Media must have a valid upload date + if upload_date: + media.published = timezone.make_aware(upload_date) + else: + log.error(f'Media has no upload date, skipping: {source} / {media}') + media.skip = True + # If the source has a download cap date check the upload date is allowed + max_cap_age = source.download_cap_date + if max_cap_age: + if media.published < max_cap_age: + # Media was published after the cap date, skip it + log.warn(f'Media: {source} / {media} is older than cap age ' + f'{max_cap_age}, skipping') + media.skip = True + # If the source has a cut-off check the upload date is within the allowed delta + if source.delete_old_media and source.days_to_keep > 0: + delta = timezone.now() - timedelta(days=source.days_to_keep) + if media.published < delta: + # Media was published after the cutoff date, skip it + log.warn(f'Media: {source} / {media} is older than ' + f'{source.days_to_keep} days, skipping') + media.skip = True + # Check we can download the media item + if not media.skip: + if media.get_format_str(): + media.can_download = True + else: + media.can_download = False + # Save the media + media.save() + log.info(f'Saved {len(media.metadata)} bytes of metadata for: ' + f'{source} / {media_id}') + + @background(schedule=0) def download_media_thumbnail(media_id, url): ''' diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index e1e28b2..e019dd2 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -109,7 +109,7 @@ {% else %} Can download? - Can download?
{% if youtube_dl_format %}{% else %}{% endif %} + Can download?
{% if media.can_download %}{% else %}{% endif %} {% endif %} diff --git a/tubesync/sync/templates/sync/media.html b/tubesync/sync/templates/sync/media.html index 88db658..cb38462 100644 --- a/tubesync/sync/templates/sync/media.html +++ b/tubesync/sync/templates/sync/media.html @@ -24,8 +24,10 @@ {% else %} {% if m.skip %} Skipped + {% elif not m.has_metadata %} + Fetching metadata {% elif m.can_download %} - {{ m.published|date:'Y-m-d' }} + Downloading {% else %} No matching formats {% endif %} diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index a15f260..1c91bf4 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -37,7 +37,8 @@ def get_media_info(url): 'skip_download': True, 'forcejson': True, 'simulate': True, - 'logger': log + 'logger': log, + 'extract_flat': True, }) response = {} with youtube_dl.YoutubeDL(opts) as y: