From 82fa0f6bce33a4ffed6161820404bb9b5cc33600 Mon Sep 17 00:00:00 2001 From: meeb Date: Thu, 18 Feb 2021 16:24:24 +1100 Subject: [PATCH] add sync.Source.download_media master flag, add manual import existing media command with docs, resolves #24 --- README.md | 8 ++ docs/import-existing-media.md | 78 +++++++++++++++++++ tubesync/sync/admin.py | 2 +- .../commands/import-existing-media.py | 55 +++++++++++++ .../migrations/0009_auto_20210218_0442.py | 30 +++++++ tubesync/sync/models.py | 26 ++++++- tubesync/sync/signals.py | 3 +- tubesync/sync/tasks.py | 11 +++ tubesync/sync/templates/sync/media-item.html | 8 +- tubesync/sync/templates/sync/media.html | 2 + tubesync/sync/templates/sync/source.html | 4 + tubesync/sync/views.py | 12 +-- 12 files changed, 226 insertions(+), 13 deletions(-) create mode 100644 docs/import-existing-media.md create mode 100644 tubesync/sync/management/commands/import-existing-media.py create mode 100644 tubesync/sync/migrations/0009_auto_20210218_0442.py diff --git a/README.md b/README.md index 9c18777..3f300d5 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,14 @@ $ docker logs --follow tubesync ``` +# Advanced usage guides + +Once you're happy using TubeSync there are some advanced usage guides for more complex +and less common features: + +![Import existing media into TubeSync](https://github.com/meeb/tubesync/blob/main/docs/import-existing-media.md) + + # Warnings ### 1. Index frequency diff --git a/docs/import-existing-media.md b/docs/import-existing-media.md new file mode 100644 index 0000000..e01eebf --- /dev/null +++ b/docs/import-existing-media.md @@ -0,0 +1,78 @@ +# TubeSync + +## Advanced usage guide - importing existing media + +This is a new feature in v0.9 of TubeSync and later. It allows you to mark existing +downloaded media as "downloaded" in TubeSync. You can use this feature if, for example, +you already have an extensive catalogue of downloaded media which you want to mark +as downloaded into TubeSync so TubeSync doesn't re-download media you already have. + +## Requirements + +Your existing downloaded media MUST contain the unique ID. For YouTube videos, this is +means the YouTube video ID MUST be in the filename. + +Supported extensions to be imported are .m4a, .ogg, .mkv, .mp3, .mp4 and .avi. Your +media you want to import must end in one of these file extensions. + +## Caveats + +As TubeSync does not probe media and your existing media may be re-encoded or in +different formats to what is available in the current media metadata there is no way +for TubeSync to know what codecs, resolution, bitrate etc. your imported media is in. +Any manually imported existing local media will display blank boxes for this +information on the TubeSync interface as it's unavailable. + +## Steps + +### 1. Add your source to TubeSync + +Add your source to TubeSync, such as a YouTube channel. **Make sure you untick the +"download media" checkbox.** + +This will allow TubeSync to index all the available media on your source, but won't +start downloading any media. + +### 2. Wait + +Wait for all the media on your source to be indexed. This may take some time. + +### 3. Move your existing media into TubeSync + +You now need to move your existing media into TubeSync. You need to move the media +files into the correct download directories created by TubeSync. For example, if you +have downloaded videos for a YouTube channel "TestChannel", you would have added this +as a source called TestChannel and in a directory called test-channel in Tubesync. It +would have a download directory created on disk at: + +`/path/to/downloads/test-channel` + +You would move all of your pre-existing videos you downloaded outside of TubeSync for +this channel into this directory. + +In short, your existing media needs to be moved into the correct TubeSync source +directory to be detected. + +This is required so TubeSync can known which Source to link the media to. + +### 4. Run the batch import command + +Execute the following Django command: + +`./manage.py import-existing-media` + +When deploying TubeSync inside a container, you can execute this with: + +`docker exec -ti tubesync python3 /app/manage.py import-existing-media` + +This command will log what its doing to the terminal when you run it. + +Internally, `import-existing-media` looks for the unique media key (for YouTube, this +is the YouTube video ID) in the filename and detects the source to link it to based +on the directory the media file is inside. + + +### 5. Re-enable downloading at the source + +Edit your source and re-enable / tick the "download media" option. This will allow +TubeSync to download any missing media you did not manually import. diff --git a/tubesync/sync/admin.py b/tubesync/sync/admin.py index 5e34733..1e445b7 100644 --- a/tubesync/sync/admin.py +++ b/tubesync/sync/admin.py @@ -7,7 +7,7 @@ class SourceAdmin(admin.ModelAdmin): ordering = ('-created',) list_display = ('uuid', 'name', 'source_type', 'last_crawl', - 'has_failed') + 'download_media', 'has_failed') readonly_fields = ('uuid', 'created') search_fields = ('uuid', 'key', 'name') diff --git a/tubesync/sync/management/commands/import-existing-media.py b/tubesync/sync/management/commands/import-existing-media.py new file mode 100644 index 0000000..9a52412 --- /dev/null +++ b/tubesync/sync/management/commands/import-existing-media.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path +from django.core.management.base import BaseCommand, CommandError +from common.logger import log +from sync.models import Source, Media + + +class Command(BaseCommand): + + help = ('Scans download media directories for media not yet downloaded and ', + 'marks them as downloaded') + extra_extensions = ['mp3', 'mp4', 'avi'] + + def handle(self, *args, **options): + log.info('Building directory to Source map...') + dirmap = {} + for s in Source.objects.all(): + dirmap[s.directory_path] = s + log.info(f'Scanning sources...') + file_extensions = list(Source.EXTENSIONS) + self.extra_extensions + for sourceroot, source in dirmap.items(): + media = list(Media.objects.filter(source=source, downloaded=False, + skip=False)) + if not media: + log.info(f'Source "{source}" has no missing media') + continue + log.info(f'Scanning Source "{source}" directory for media to ' + f'import: {sourceroot}, looking for {len(media)} ' + f'undownloaded and unskipped items') + on_disk = [] + for (root, dirs, files) in os.walk(sourceroot): + rootpath = Path(root) + for filename in files: + filepart, ext = os.path.splitext(filename) + if ext.startswith('.'): + ext = ext[1:] + ext = ext.strip().lower() + if ext not in file_extensions: + continue + on_disk.append(str(rootpath / filename)) + filemap = {} + for item in media: + for filepath in on_disk: + if item.key in filepath: + # The unique item key is in the file name on disk, map it to + # the undownloaded media item + filemap[filepath] = item + continue + for filepath, item in filemap.items(): + log.info(f'Matched on-disk file: {filepath} ' + f'to media item: {item.source} / {item}') + item.media_file.name = filepath + item.downloaded = True + item.save() + log.info('Done') diff --git a/tubesync/sync/migrations/0009_auto_20210218_0442.py b/tubesync/sync/migrations/0009_auto_20210218_0442.py new file mode 100644 index 0000000..45b9450 --- /dev/null +++ b/tubesync/sync/migrations/0009_auto_20210218_0442.py @@ -0,0 +1,30 @@ +# Generated by Django 3.1.6 on 2021-02-18 04:42 + +import django.core.files.storage +from django.db import migrations, models +import sync.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0008_source_download_cap'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='download_media', + field=models.BooleanField(default=True, help_text='Download media from this source, if not selected the source will only be indexed', verbose_name='download media'), + ), + migrations.AlterField( + model_name='media', + name='media_file', + field=models.FileField(blank=True, help_text='Media file', max_length=200, null=True, storage=django.core.files.storage.FileSystemStorage(location='/home/meeb/Repos/github.com/meeb/tubesync/tubesync/downloads'), upload_to=sync.models.get_media_file_path, verbose_name='media file'), + ), + migrations.AlterField( + model_name='source', + name='media_format', + field=models.CharField(default='{yyyymmdd}_{source}_{title}_{key}_{format}.{ext}', help_text='File format to use for saving files, detailed options at bottom of page.', max_length=200, verbose_name='media format'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index be3e9ee..feb2f0b 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -101,6 +101,11 @@ class Source(models.Model): (FALLBACK_NEXT_BEST_HD, _('Get next best resolution but at least HD')) ) + EXTENSION_M4A = 'm4a' + EXTENSION_OGG = 'ogg' + EXTENSION_MKV = 'mkv' + EXTENSIONS = (EXTENSION_M4A, EXTENSION_OGG, EXTENSION_MKV) + # Fontawesome icons used for the source on the front end ICONS = { SOURCE_TYPE_YOUTUBE_CHANNEL: '', @@ -216,6 +221,11 @@ class Source(models.Model): default=IndexSchedule.EVERY_6_HOURS, help_text=_('Schedule of how often to index the source for new media') ) + download_media = models.BooleanField( + _('download media'), + default=True, + help_text=_('Download media from this source, if not selected the source will only be indexed') + ) download_cap = models.IntegerField( _('download cap'), choices=CapChoices.choices, @@ -334,13 +344,13 @@ class Source(models.Model): ''' if self.is_audio: if self.source_acodec == self.SOURCE_ACODEC_MP4A: - return 'm4a' + return self.EXTENSION_M4A elif self.source_acodec == self.SOURCE_ACODEC_OPUS: - return 'ogg' + return self.EXTENSION_OGG else: raise ValueError('Unable to choose audio extension, uknown acodec') else: - return 'mkv' + return self.EXTENSION_MKV @classmethod def create_url(obj, source_type, key): @@ -564,14 +574,18 @@ class Media(models.Model): STATE_SCHEDULED = 'scheduled' STATE_DOWNLOADING = 'downloading' STATE_DOWNLOADED = 'downloaded' + STATE_SKIPPED = 'skipped' + STATE_DISABLED_AT_SOURCE = 'source-disabled' STATE_ERROR = 'error' STATES = (STATE_UNKNOWN, STATE_SCHEDULED, STATE_DOWNLOADING, STATE_DOWNLOADED, - STATE_ERROR) + STATE_SKIPPED, STATE_DISABLED_AT_SOURCE, STATE_ERROR) STATE_ICONS = { STATE_UNKNOWN: '', STATE_SCHEDULED: '', STATE_DOWNLOADING: '', STATE_DOWNLOADED: '', + STATE_SKIPPED: '', + STATE_DISABLED_AT_SOURCE: '', STATE_ERROR: '', } @@ -1177,6 +1191,10 @@ class Media(models.Model): return self.STATE_ERROR else: return self.STATE_SCHEDULED + if self.skip: + return self.STATE_SKIPPED + if not self.source.download_media: + return self.STATE_DISABLED_AT_SOURCE return self.STATE_UNKNOWN def get_download_state_icon(self, task=None): diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index a27cd91..1cb1852 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -136,7 +136,8 @@ def media_post_save(sender, instance, created, **kwargs): if not instance.media_file_exists: instance.downloaded = False instance.media_file = None - if not instance.downloaded and instance.can_download and not instance.skip: + if (not instance.downloaded and instance.can_download and not instance.skip + and instance.source.download_media): delete_task_by_media('sync.tasks.download_media', (str(instance.pk),)) verbose_name = _('Downloading media for "{}"') download_media( diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 21c489a..47bf3cf 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -308,6 +308,17 @@ def download_media(media_id): log.warn(f'Download task triggeredd media: {media} (UUID: {media.pk}) but it ' f'is now marked to be skipped, not downloading') return + if media.downloaded and media.media_file: + # Media has been marked as downloaded before the download_media task was fired, + # skip it + log.warn(f'Download task triggeredd media: {media} (UUID: {media.pk}) but it ' + f'has already been marked as downloaded, not downloading again') + return + if not media.source.download_media: + log.warn(f'Download task triggeredd media: {media} (UUID: {media.pk}) but the ' + f'source {media.source} has since been marked to not download media, ' + f'not downloading') + return filepath = media.filepath log.info(f'Downloading media: {media} (UUID: {media.pk}) to: "{filepath}"') format_str, container = media.download_media() diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index e019dd2..62f6f35 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -64,8 +64,14 @@ Fallback Fallback
{{ media.source.get_fallback_display }} + {% if not media.source.download_media %} + + Source download? + Source download?
{% if media.source.download_media %}{% else %}{% endif %} + + {% endif %} {% if media.skip %} - + Skipping? Skipping?
{% if media.skip %}{% else %}{% endif %} diff --git a/tubesync/sync/templates/sync/media.html b/tubesync/sync/templates/sync/media.html index cb38462..76ae9ea 100644 --- a/tubesync/sync/templates/sync/media.html +++ b/tubesync/sync/templates/sync/media.html @@ -24,6 +24,8 @@ {% else %} {% if m.skip %} Skipped + {% elif not m.source.download_media %} + Disabled at source {% elif not m.has_metadata %} Fetching metadata {% elif m.can_download %} diff --git a/tubesync/sync/templates/sync/source.html b/tubesync/sync/templates/sync/source.html index 408b9db..f8b1e39 100644 --- a/tubesync/sync/templates/sync/source.html +++ b/tubesync/sync/templates/sync/source.html @@ -61,6 +61,10 @@ Index schedule Index schedule
{{ source.get_index_schedule_display }} + + Download media? + Download media?
{% if source.download_media %}{% else %}{% endif %} + Created Created
{{ source.created|date:'Y-m-d H:i:s' }} diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 1063dd8..d6a7851 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -274,9 +274,9 @@ class AddSourceView(CreateView): template_name = 'sync/source-add.html' model = Source fields = ('source_type', 'key', 'name', 'directory', 'media_format', - 'index_schedule', 'download_cap', 'delete_old_media', 'days_to_keep', - 'source_resolution', 'source_vcodec', 'source_acodec', 'prefer_60fps', - 'prefer_hdr', 'fallback', 'copy_thumbnails', 'write_nfo') + 'index_schedule', 'download_media', 'download_cap', 'delete_old_media', + 'days_to_keep', 'source_resolution', 'source_vcodec', 'source_acodec', + 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails', 'write_nfo') errors = { 'invalid_media_format': _('Invalid media format, the media format contains ' 'errors or is empty. Check the table at the end of ' @@ -365,9 +365,9 @@ class UpdateSourceView(UpdateView): template_name = 'sync/source-update.html' model = Source fields = ('source_type', 'key', 'name', 'directory', 'media_format', - 'index_schedule', 'download_cap', 'delete_old_media', 'days_to_keep', - 'source_resolution', 'source_vcodec', 'source_acodec', 'prefer_60fps', - 'prefer_hdr', 'fallback', 'copy_thumbnails', 'write_nfo') + 'index_schedule', 'download_media', 'download_cap', 'delete_old_media', + 'days_to_keep', 'source_resolution', 'source_vcodec', 'source_acodec', + 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails', 'write_nfo') errors = { 'invalid_media_format': _('Invalid media format, the media format contains ' 'errors or is empty. Check the table at the end of '