Remove BeautifulSoup to only use YT-DLP

This commit is contained in:
administrator 2023-12-11 18:50:31 +01:00
parent 7b2bb3dca6
commit 8770c76d6b
5 changed files with 78 additions and 51 deletions

View File

@ -23,4 +23,3 @@ yt-dlp = "*"
redis = "*" redis = "*"
hiredis = "*" hiredis = "*"
requests = {extras = ["socks"], version = "*"} requests = {extras = ["socks"], version = "*"}
bs4 = "*"

View File

@ -3,7 +3,6 @@ import uuid
import json import json
import re import re
import requests import requests
from bs4 import BeautifulSoup
from xml.etree import ElementTree from xml.etree import ElementTree
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -18,7 +17,8 @@ from django.utils.translation import gettext_lazy as _
from common.errors import NoFormatException from common.errors import NoFormatException
from common.utils import clean_filename from common.utils import clean_filename
from .youtube import (get_media_info as get_youtube_media_info, from .youtube import (get_media_info as get_youtube_media_info,
download_media as download_youtube_media) download_media as download_youtube_media,
get_channel_image_info as get_youtube_channel_image_info)
from .utils import seconds_to_timestr, parse_media_format from .utils import seconds_to_timestr, parse_media_format
from .matching import (get_best_combined_format, get_best_audio_format, from .matching import (get_best_combined_format, get_best_audio_format,
get_best_video_format) get_best_video_format)
@ -490,26 +490,12 @@ class Source(models.Model):
return os.makedirs(self.directory_path, exist_ok=True) return os.makedirs(self.directory_path, exist_ok=True)
@property @property
def get_thumbnail_url(self): def get_image_url(self):
if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST: if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST:
raise Exception('This source is a playlist so it doesn\'t have thumbnail.') raise Exception('This source is a playlist so it doesn\'t have thumbnail.')
try: return get_youtube_channel_image_info(self.url)
response = requests.get(self.url, cookies={'CONSENT': 'YES+1'})
response.raise_for_status()
except RequestException as e:
print(f"Error occurred while making a request to YouTube: {e}")
return None
soup = BeautifulSoup(response.text, "html.parser")
try:
data = re.search(r"var ytInitialData = ({.*});", str(soup.prettify())).group(1)
json_data = json.loads(data)
return json_data["header"]["c4TabbedHeaderRenderer"]["avatar"]["thumbnails"][2]["url"]
except (KeyError, ValueError, TypeError) as e:
print(f"Error occurred while parsing YouTube JSON: {e}")
return None
def directory_exists(self): def directory_exists(self):
return (os.path.isdir(self.directory_path) and return (os.path.isdir(self.directory_path) and

View File

@ -233,14 +233,27 @@ def download_source_thumbnail(source_id):
log.error(f'Task download_source_thumbnail(pk={source_id}) called but no ' log.error(f'Task download_source_thumbnail(pk={source_id}) called but no '
f'source exists with ID: {source_id}') f'source exists with ID: {source_id}')
return return
avatar, banner = source.get_image_url
url = source.get_thumbnail_url log.info(f'Thumbnail URL for source with ID: {source_id} '
width = 400 f'Avatar: {avatar} '
height = 400 f'Banner: {banner}')
if banner != None:
url = banner
i = get_remote_image(url)
image_file = BytesIO()
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
file_name = "banner.jpg"
# Reset file pointer to the beginning for the next save
image_file.seek(0)
# Create a Django ContentFile from BytesIO stream
django_file = ContentFile(image_file.read())
file_path = source.directory_path / file_name
with open(file_path, 'wb') as f:
f.write(django_file.read())
if avatar != None:
url = avatar
i = get_remote_image(url) i = get_remote_image(url)
log.info(f'Resizing {i.width}x{i.height} thumbnail to '
f'{width}x{height}: {url}')
i = resize_image_to_height(i, width, height)
image_file = BytesIO() image_file = BytesIO()
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True) i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
@ -253,7 +266,7 @@ def download_source_thumbnail(source_id):
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
f.write(django_file.read()) f.write(django_file.read())
log.info(f'Thumbnail downloaded from {url} for source with ID: {source_id}') log.info(f'Thumbnail downloaded for source with ID: {source_id}')
@background(schedule=0) @background(schedule=0)

View File

@ -35,6 +35,35 @@ def get_yt_opts():
opts.update({'cookiefile': cookie_file_path}) opts.update({'cookiefile': cookie_file_path})
return opts return opts
def get_channel_image_info(url):
opts = get_yt_opts()
opts.update({
'skip_download': True,
'forcejson': True,
'simulate': True,
'logger': log,
'extract_flat': True, # Change to False to get detailed info
})
with yt_dlp.YoutubeDL(opts) as y:
try:
response = y.extract_info(url, download=False)
avatar_url = None
banner_url = None
for thumbnail in response['thumbnails']:
if thumbnail['id'] == 'avatar_uncropped':
avatar_url = thumbnail['url']
if thumbnail['id'] == 'banner_uncropped':
banner_url = thumbnail['url']
if banner_url != None and avatar_url != None:
break
return avatar_url, banner_url
except yt_dlp.utils.DownloadError as e:
raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e
def get_media_info(url): def get_media_info(url):
''' '''