Remove BeautifulSoup to only use YT-DLP

This commit is contained in:
administrator 2023-12-11 18:50:31 +01:00
parent 7b2bb3dca6
commit 8770c76d6b
5 changed files with 78 additions and 51 deletions

View File

@ -22,5 +22,4 @@ mysqlclient = "*"
yt-dlp = "*"
redis = "*"
hiredis = "*"
requests = {extras = ["socks"], version = "*"}
bs4 = "*"
requests = {extras = ["socks"], version = "*"}

View File

@ -1,18 +1,18 @@
# Generated by nothing. Done manually by InterN0te on 2023-12-10 16:36
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0020_auto_20231024_1825'),
]
operations = [
migrations.AddField(
model_name='source',
name='copy_channel_thumbnails',
field=models.BooleanField(default=False, help_text='Copy channel thumbnails in poster.jpg and season-poster.jpg, these may be detected and used by some media servers', verbose_name='copy channel thumbnails'),
),
]
# Generated by nothing. Done manually by InterN0te on 2023-12-10 16:36
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0020_auto_20231024_1825'),
]
operations = [
migrations.AddField(
model_name='source',
name='copy_channel_thumbnails',
field=models.BooleanField(default=False, help_text='Copy channel thumbnails in poster.jpg and season-poster.jpg, these may be detected and used by some media servers', verbose_name='copy channel thumbnails'),
),
]

View File

@ -3,7 +3,6 @@ import uuid
import json
import re
import requests
from bs4 import BeautifulSoup
from xml.etree import ElementTree
from collections import OrderedDict
from datetime import datetime, timedelta
@ -18,7 +17,8 @@ from django.utils.translation import gettext_lazy as _
from common.errors import NoFormatException
from common.utils import clean_filename
from .youtube import (get_media_info as get_youtube_media_info,
download_media as download_youtube_media)
download_media as download_youtube_media,
get_channel_image_info as get_youtube_channel_image_info)
from .utils import seconds_to_timestr, parse_media_format
from .matching import (get_best_combined_format, get_best_audio_format,
get_best_video_format)
@ -490,26 +490,12 @@ class Source(models.Model):
return os.makedirs(self.directory_path, exist_ok=True)
@property
def get_thumbnail_url(self):
def get_image_url(self):
if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST:
raise Exception('This source is a playlist so it doesn\'t have thumbnail.')
try:
response = requests.get(self.url, cookies={'CONSENT': 'YES+1'})
response.raise_for_status()
except RequestException as e:
print(f"Error occurred while making a request to YouTube: {e}")
return None
return get_youtube_channel_image_info(self.url)
soup = BeautifulSoup(response.text, "html.parser")
try:
data = re.search(r"var ytInitialData = ({.*});", str(soup.prettify())).group(1)
json_data = json.loads(data)
return json_data["header"]["c4TabbedHeaderRenderer"]["avatar"]["thumbnails"][2]["url"]
except (KeyError, ValueError, TypeError) as e:
print(f"Error occurred while parsing YouTube JSON: {e}")
return None
def directory_exists(self):
return (os.path.isdir(self.directory_path) and

View File

@ -233,18 +233,16 @@ def download_source_thumbnail(source_id):
log.error(f'Task download_source_thumbnail(pk={source_id}) called but no '
f'source exists with ID: {source_id}')
return
url = source.get_thumbnail_url
width = 400
height = 400
i = get_remote_image(url)
log.info(f'Resizing {i.width}x{i.height} thumbnail to '
f'{width}x{height}: {url}')
i = resize_image_to_height(i, width, height)
image_file = BytesIO()
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
for file_name in ["poster.jpg", "season-poster.jpg"]:
avatar, banner = source.get_image_url
log.info(f'Thumbnail URL for source with ID: {source_id} '
f'Avatar: {avatar} '
f'Banner: {banner}')
if banner != None:
url = banner
i = get_remote_image(url)
image_file = BytesIO()
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
file_name = "banner.jpg"
# Reset file pointer to the beginning for the next save
image_file.seek(0)
# Create a Django ContentFile from BytesIO stream
@ -253,7 +251,22 @@ def download_source_thumbnail(source_id):
with open(file_path, 'wb') as f:
f.write(django_file.read())
log.info(f'Thumbnail downloaded from {url} for source with ID: {source_id}')
if avatar != None:
url = avatar
i = get_remote_image(url)
image_file = BytesIO()
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
for file_name in ["poster.jpg", "season-poster.jpg"]:
# Reset file pointer to the beginning for the next save
image_file.seek(0)
# Create a Django ContentFile from BytesIO stream
django_file = ContentFile(image_file.read())
file_path = source.directory_path / file_name
with open(file_path, 'wb') as f:
f.write(django_file.read())
log.info(f'Thumbnail downloaded for source with ID: {source_id}')
@background(schedule=0)

View File

@ -35,6 +35,35 @@ def get_yt_opts():
opts.update({'cookiefile': cookie_file_path})
return opts
def get_channel_image_info(url):
opts = get_yt_opts()
opts.update({
'skip_download': True,
'forcejson': True,
'simulate': True,
'logger': log,
'extract_flat': True, # Change to False to get detailed info
})
with yt_dlp.YoutubeDL(opts) as y:
try:
response = y.extract_info(url, download=False)
avatar_url = None
banner_url = None
for thumbnail in response['thumbnails']:
if thumbnail['id'] == 'avatar_uncropped':
avatar_url = thumbnail['url']
if thumbnail['id'] == 'banner_uncropped':
banner_url = thumbnail['url']
if banner_url != None and avatar_url != None:
break
return avatar_url, banner_url
except yt_dlp.utils.DownloadError as e:
raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e
def get_media_info(url):
'''