Remove BeautifulSoup to only use YT-DLP
This commit is contained in:
parent
7b2bb3dca6
commit
8770c76d6b
1
Pipfile
1
Pipfile
|
@ -23,4 +23,3 @@ yt-dlp = "*"
|
||||||
redis = "*"
|
redis = "*"
|
||||||
hiredis = "*"
|
hiredis = "*"
|
||||||
requests = {extras = ["socks"], version = "*"}
|
requests = {extras = ["socks"], version = "*"}
|
||||||
bs4 = "*"
|
|
|
@ -3,7 +3,6 @@ import uuid
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
@ -18,7 +17,8 @@ from django.utils.translation import gettext_lazy as _
|
||||||
from common.errors import NoFormatException
|
from common.errors import NoFormatException
|
||||||
from common.utils import clean_filename
|
from common.utils import clean_filename
|
||||||
from .youtube import (get_media_info as get_youtube_media_info,
|
from .youtube import (get_media_info as get_youtube_media_info,
|
||||||
download_media as download_youtube_media)
|
download_media as download_youtube_media,
|
||||||
|
get_channel_image_info as get_youtube_channel_image_info)
|
||||||
from .utils import seconds_to_timestr, parse_media_format
|
from .utils import seconds_to_timestr, parse_media_format
|
||||||
from .matching import (get_best_combined_format, get_best_audio_format,
|
from .matching import (get_best_combined_format, get_best_audio_format,
|
||||||
get_best_video_format)
|
get_best_video_format)
|
||||||
|
@ -490,26 +490,12 @@ class Source(models.Model):
|
||||||
return os.makedirs(self.directory_path, exist_ok=True)
|
return os.makedirs(self.directory_path, exist_ok=True)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def get_thumbnail_url(self):
|
def get_image_url(self):
|
||||||
if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST:
|
if self.source_type == self.SOURCE_TYPE_YOUTUBE_PLAYLIST:
|
||||||
raise Exception('This source is a playlist so it doesn\'t have thumbnail.')
|
raise Exception('This source is a playlist so it doesn\'t have thumbnail.')
|
||||||
|
|
||||||
try:
|
return get_youtube_channel_image_info(self.url)
|
||||||
response = requests.get(self.url, cookies={'CONSENT': 'YES+1'})
|
|
||||||
response.raise_for_status()
|
|
||||||
except RequestException as e:
|
|
||||||
print(f"Error occurred while making a request to YouTube: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
|
|
||||||
try:
|
|
||||||
data = re.search(r"var ytInitialData = ({.*});", str(soup.prettify())).group(1)
|
|
||||||
json_data = json.loads(data)
|
|
||||||
return json_data["header"]["c4TabbedHeaderRenderer"]["avatar"]["thumbnails"][2]["url"]
|
|
||||||
except (KeyError, ValueError, TypeError) as e:
|
|
||||||
print(f"Error occurred while parsing YouTube JSON: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def directory_exists(self):
|
def directory_exists(self):
|
||||||
return (os.path.isdir(self.directory_path) and
|
return (os.path.isdir(self.directory_path) and
|
||||||
|
|
|
@ -233,14 +233,27 @@ def download_source_thumbnail(source_id):
|
||||||
log.error(f'Task download_source_thumbnail(pk={source_id}) called but no '
|
log.error(f'Task download_source_thumbnail(pk={source_id}) called but no '
|
||||||
f'source exists with ID: {source_id}')
|
f'source exists with ID: {source_id}')
|
||||||
return
|
return
|
||||||
|
avatar, banner = source.get_image_url
|
||||||
url = source.get_thumbnail_url
|
log.info(f'Thumbnail URL for source with ID: {source_id} '
|
||||||
width = 400
|
f'Avatar: {avatar} '
|
||||||
height = 400
|
f'Banner: {banner}')
|
||||||
|
if banner != None:
|
||||||
|
url = banner
|
||||||
|
i = get_remote_image(url)
|
||||||
|
image_file = BytesIO()
|
||||||
|
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
|
||||||
|
file_name = "banner.jpg"
|
||||||
|
# Reset file pointer to the beginning for the next save
|
||||||
|
image_file.seek(0)
|
||||||
|
# Create a Django ContentFile from BytesIO stream
|
||||||
|
django_file = ContentFile(image_file.read())
|
||||||
|
file_path = source.directory_path / file_name
|
||||||
|
with open(file_path, 'wb') as f:
|
||||||
|
f.write(django_file.read())
|
||||||
|
|
||||||
|
if avatar != None:
|
||||||
|
url = avatar
|
||||||
i = get_remote_image(url)
|
i = get_remote_image(url)
|
||||||
log.info(f'Resizing {i.width}x{i.height} thumbnail to '
|
|
||||||
f'{width}x{height}: {url}')
|
|
||||||
i = resize_image_to_height(i, width, height)
|
|
||||||
image_file = BytesIO()
|
image_file = BytesIO()
|
||||||
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
|
i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
|
||||||
|
|
||||||
|
@ -253,7 +266,7 @@ def download_source_thumbnail(source_id):
|
||||||
with open(file_path, 'wb') as f:
|
with open(file_path, 'wb') as f:
|
||||||
f.write(django_file.read())
|
f.write(django_file.read())
|
||||||
|
|
||||||
log.info(f'Thumbnail downloaded from {url} for source with ID: {source_id}')
|
log.info(f'Thumbnail downloaded for source with ID: {source_id}')
|
||||||
|
|
||||||
|
|
||||||
@background(schedule=0)
|
@background(schedule=0)
|
||||||
|
|
|
@ -35,6 +35,35 @@ def get_yt_opts():
|
||||||
opts.update({'cookiefile': cookie_file_path})
|
opts.update({'cookiefile': cookie_file_path})
|
||||||
return opts
|
return opts
|
||||||
|
|
||||||
|
def get_channel_image_info(url):
|
||||||
|
opts = get_yt_opts()
|
||||||
|
opts.update({
|
||||||
|
'skip_download': True,
|
||||||
|
'forcejson': True,
|
||||||
|
'simulate': True,
|
||||||
|
'logger': log,
|
||||||
|
'extract_flat': True, # Change to False to get detailed info
|
||||||
|
})
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(opts) as y:
|
||||||
|
try:
|
||||||
|
response = y.extract_info(url, download=False)
|
||||||
|
|
||||||
|
avatar_url = None
|
||||||
|
banner_url = None
|
||||||
|
for thumbnail in response['thumbnails']:
|
||||||
|
if thumbnail['id'] == 'avatar_uncropped':
|
||||||
|
avatar_url = thumbnail['url']
|
||||||
|
if thumbnail['id'] == 'banner_uncropped':
|
||||||
|
banner_url = thumbnail['url']
|
||||||
|
if banner_url != None and avatar_url != None:
|
||||||
|
break
|
||||||
|
|
||||||
|
return avatar_url, banner_url
|
||||||
|
except yt_dlp.utils.DownloadError as e:
|
||||||
|
raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_media_info(url):
|
def get_media_info(url):
|
||||||
'''
|
'''
|
||||||
|
|
Loading…
Reference in New Issue