From 8770c76d6b1b9a88669fe665d7ec883eaf8b88e2 Mon Sep 17 00:00:00 2001 From: administrator <7dn1yh5j@debauchez.fr> Date: Mon, 11 Dec 2023 18:50:31 +0100 Subject: [PATCH] Remove BeautifulSoup to only use YT-DLP --- Pipfile | 3 +- ...> 0021_source_copy_channel_thumbnails2.py} | 36 ++++++++--------- tubesync/sync/models.py | 22 ++--------- tubesync/sync/tasks.py | 39 ++++++++++++------- tubesync/sync/youtube.py | 29 ++++++++++++++ 5 files changed, 78 insertions(+), 51 deletions(-) rename tubesync/sync/migrations/{0021_source_copy_channel_thumbnails.py => 0021_source_copy_channel_thumbnails2.py} (97%) diff --git a/Pipfile b/Pipfile index 090b4b5..bfe7a90 100644 --- a/Pipfile +++ b/Pipfile @@ -22,5 +22,4 @@ mysqlclient = "*" yt-dlp = "*" redis = "*" hiredis = "*" -requests = {extras = ["socks"], version = "*"} -bs4 = "*" \ No newline at end of file +requests = {extras = ["socks"], version = "*"} \ No newline at end of file diff --git a/tubesync/sync/migrations/0021_source_copy_channel_thumbnails.py b/tubesync/sync/migrations/0021_source_copy_channel_thumbnails2.py similarity index 97% rename from tubesync/sync/migrations/0021_source_copy_channel_thumbnails.py rename to tubesync/sync/migrations/0021_source_copy_channel_thumbnails2.py index 6c59dee..178ac97 100644 --- a/tubesync/sync/migrations/0021_source_copy_channel_thumbnails.py +++ b/tubesync/sync/migrations/0021_source_copy_channel_thumbnails2.py @@ -1,18 +1,18 @@ -# Generated by nothing. Done manually by InterN0te on 2023-12-10 16:36 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('sync', '0020_auto_20231024_1825'), - ] - - operations = [ - migrations.AddField( - model_name='source', - name='copy_channel_thumbnails', - field=models.BooleanField(default=False, help_text='Copy channel thumbnails in poster.jpg and season-poster.jpg, these may be detected and used by some media servers', verbose_name='copy channel thumbnails'), - ), - ] +# Generated by nothing. Done manually by InterN0te on 2023-12-10 16:36 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0020_auto_20231024_1825'), + ] + + operations = [ + migrations.AddField( + model_name='source', + name='copy_channel_thumbnails', + field=models.BooleanField(default=False, help_text='Copy channel thumbnails in poster.jpg and season-poster.jpg, these may be detected and used by some media servers', verbose_name='copy channel thumbnails'), + ), + ] diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 8ca621b..9efe026 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -3,7 +3,6 @@ import uuid import json import re import requests -from bs4 import BeautifulSoup from xml.etree import ElementTree from collections import OrderedDict from datetime import datetime, timedelta @@ -18,7 +17,8 @@ from django.utils.translation import gettext_lazy as _ from common.errors import NoFormatException from common.utils import clean_filename from .youtube import (get_media_info as get_youtube_media_info, - download_media as download_youtube_media) + download_media as download_youtube_media, + get_channel_image_info as get_youtube_channel_image_info) from .utils import seconds_to_timestr, parse_media_format from .matching import (get_best_combined_format, get_best_audio_format, get_best_video_format) @@ -490,26 +490,12 @@ class Source(models.Model): return os.makedirs(self.directory_path, exist_ok=True) @property - def get_thumbnail_url(self): + def get_image_url(self): if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST: raise Exception('This source is a playlist so it doesn\'t have thumbnail.') - try: - response = requests.get(self.url, cookies={'CONSENT': 'YES+1'}) - response.raise_for_status() - except RequestException as e: - print(f"Error occurred while making a request to YouTube: {e}") - return None + return get_youtube_channel_image_info(self.url) - soup = BeautifulSoup(response.text, "html.parser") - - try: - data = re.search(r"var ytInitialData = ({.*});", str(soup.prettify())).group(1) - json_data = json.loads(data) - return json_data["header"]["c4TabbedHeaderRenderer"]["avatar"]["thumbnails"][2]["url"] - except (KeyError, ValueError, TypeError) as e: - print(f"Error occurred while parsing YouTube JSON: {e}") - return None def directory_exists(self): return (os.path.isdir(self.directory_path) and diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 87807b5..b51ca55 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -233,18 +233,16 @@ def download_source_thumbnail(source_id): log.error(f'Task download_source_thumbnail(pk={source_id}) called but no ' f'source exists with ID: {source_id}') return - - url = source.get_thumbnail_url - width = 400 - height = 400 - i = get_remote_image(url) - log.info(f'Resizing {i.width}x{i.height} thumbnail to ' - f'{width}x{height}: {url}') - i = resize_image_to_height(i, width, height) - image_file = BytesIO() - i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True) - - for file_name in ["poster.jpg", "season-poster.jpg"]: + avatar, banner = source.get_image_url + log.info(f'Thumbnail URL for source with ID: {source_id} ' + f'Avatar: {avatar} ' + f'Banner: {banner}') + if banner != None: + url = banner + i = get_remote_image(url) + image_file = BytesIO() + i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True) + file_name = "banner.jpg" # Reset file pointer to the beginning for the next save image_file.seek(0) # Create a Django ContentFile from BytesIO stream @@ -253,7 +251,22 @@ def download_source_thumbnail(source_id): with open(file_path, 'wb') as f: f.write(django_file.read()) - log.info(f'Thumbnail downloaded from {url} for source with ID: {source_id}') + if avatar != None: + url = avatar + i = get_remote_image(url) + image_file = BytesIO() + i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True) + + for file_name in ["poster.jpg", "season-poster.jpg"]: + # Reset file pointer to the beginning for the next save + image_file.seek(0) + # Create a Django ContentFile from BytesIO stream + django_file = ContentFile(image_file.read()) + file_path = source.directory_path / file_name + with open(file_path, 'wb') as f: + f.write(django_file.read()) + + log.info(f'Thumbnail downloaded for source with ID: {source_id}') @background(schedule=0) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 4ac6e83..1ef1fab 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -35,6 +35,35 @@ def get_yt_opts(): opts.update({'cookiefile': cookie_file_path}) return opts +def get_channel_image_info(url): + opts = get_yt_opts() + opts.update({ + 'skip_download': True, + 'forcejson': True, + 'simulate': True, + 'logger': log, + 'extract_flat': True, # Change to False to get detailed info + }) + + with yt_dlp.YoutubeDL(opts) as y: + try: + response = y.extract_info(url, download=False) + + avatar_url = None + banner_url = None + for thumbnail in response['thumbnails']: + if thumbnail['id'] == 'avatar_uncropped': + avatar_url = thumbnail['url'] + if thumbnail['id'] == 'banner_uncropped': + banner_url = thumbnail['url'] + if banner_url != None and avatar_url != None: + break + + return avatar_url, banner_url + except yt_dlp.utils.DownloadError as e: + raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e + + def get_media_info(url): '''