2020-12-06 01:22:16 +00:00
|
|
|
'''
|
|
|
|
Start, stop and manage scheduled tasks. These are generally triggered by Django
|
|
|
|
signals (see signals.py).
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
2020-12-06 07:33:48 +00:00
|
|
|
import math
|
2020-12-07 10:26:46 +00:00
|
|
|
import uuid
|
2020-12-06 01:22:16 +00:00
|
|
|
from io import BytesIO
|
2020-12-07 10:26:46 +00:00
|
|
|
from hashlib import sha1
|
|
|
|
from datetime import timedelta
|
2020-12-06 07:33:48 +00:00
|
|
|
from PIL import Image
|
2020-12-06 01:22:16 +00:00
|
|
|
from django.conf import settings
|
|
|
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
2020-12-06 07:33:48 +00:00
|
|
|
from django.utils import timezone
|
2020-12-07 04:39:58 +00:00
|
|
|
from django.db.utils import IntegrityError
|
2020-12-06 01:22:16 +00:00
|
|
|
from background_task import background
|
2020-12-07 10:26:46 +00:00
|
|
|
from background_task.models import Task, CompletedTask
|
2020-12-06 02:48:10 +00:00
|
|
|
from common.logger import log
|
2020-12-07 10:26:46 +00:00
|
|
|
from common.errors import NoMediaException
|
2020-12-06 01:22:16 +00:00
|
|
|
from .models import Source, Media
|
2020-12-06 08:10:00 +00:00
|
|
|
from .utils import get_remote_image, resize_image_to_height
|
2020-12-06 01:22:16 +00:00
|
|
|
|
|
|
|
|
2020-12-07 10:26:46 +00:00
|
|
|
def get_hash(task_name, pk):
|
|
|
|
'''
|
|
|
|
Create a background_task compatible hash for a Task or CompletedTask.
|
|
|
|
'''
|
|
|
|
task_params = json.dumps(((str(pk),), {}), sort_keys=True)
|
|
|
|
return sha1(f'{task_name}{task_params}'.encode('utf-8')).hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
def map_task_to_instance(task):
|
|
|
|
'''
|
2020-12-08 05:56:43 +00:00
|
|
|
Reverse-maps a scheduled backgrond task to an instance. Requires the task name
|
2020-12-07 10:26:46 +00:00
|
|
|
to be a known task function and the first argument to be a UUID. This is used
|
|
|
|
because UUID's are incompatible with background_task's "creator" feature.
|
|
|
|
'''
|
|
|
|
TASK_MAP = {
|
|
|
|
'sync.tasks.index_source_task': Source,
|
|
|
|
'sync.tasks.download_media_thumbnail': Media,
|
|
|
|
}
|
|
|
|
MODEL_URL_MAP = {
|
|
|
|
Source: 'sync:source',
|
|
|
|
Media: 'sync:media-item',
|
|
|
|
}
|
2020-12-08 05:56:43 +00:00
|
|
|
# If the task has a UUID set in its .queue it's probably a link to a Source
|
|
|
|
if task.queue:
|
|
|
|
try:
|
|
|
|
queue_uuid = uuid.UUID(task.queue)
|
|
|
|
try:
|
2020-12-09 08:32:23 +00:00
|
|
|
url = MODEL_URL_MAP.get(Source, None)
|
|
|
|
return Source.objects.get(pk=task.queue), url
|
2020-12-08 05:56:43 +00:00
|
|
|
except Source.DoesNotExist:
|
|
|
|
pass
|
|
|
|
except (TypeError, ValueError, AttributeError):
|
|
|
|
pass
|
|
|
|
# Unpack
|
2020-12-07 10:26:46 +00:00
|
|
|
task_func, task_args_str = task.task_name, task.task_params
|
|
|
|
model = TASK_MAP.get(task_func, None)
|
|
|
|
if not model:
|
|
|
|
return None, None
|
|
|
|
url = MODEL_URL_MAP.get(model, None)
|
|
|
|
if not url:
|
|
|
|
return None, None
|
|
|
|
try:
|
|
|
|
task_args = json.loads(task_args_str)
|
|
|
|
except (TypeError, ValueError, AttributeError):
|
|
|
|
return None, None
|
|
|
|
if len(task_args) != 2:
|
|
|
|
return None, None
|
|
|
|
args, kwargs = task_args
|
|
|
|
if len(args) == 0:
|
|
|
|
return None, None
|
|
|
|
instance_uuid_str = args[0]
|
|
|
|
try:
|
|
|
|
instance_uuid = uuid.UUID(instance_uuid_str)
|
|
|
|
except (TypeError, ValueError, AttributeError):
|
|
|
|
return None, None
|
2020-12-06 01:22:16 +00:00
|
|
|
try:
|
2020-12-07 10:26:46 +00:00
|
|
|
instance = model.objects.get(pk=instance_uuid)
|
|
|
|
return instance, url
|
|
|
|
except model.DoesNotExist:
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
def get_error_message(task):
|
|
|
|
'''
|
2020-12-08 05:56:43 +00:00
|
|
|
Extract an error message from a failed task. This is the last line of the
|
|
|
|
last_error field with the method name removed.
|
2020-12-07 10:26:46 +00:00
|
|
|
'''
|
|
|
|
if not task.has_error():
|
|
|
|
return ''
|
|
|
|
stacktrace_lines = task.last_error.strip().split('\n')
|
|
|
|
if len(stacktrace_lines) == 0:
|
|
|
|
return ''
|
|
|
|
error_message = stacktrace_lines[-1].strip()
|
|
|
|
if ':' not in error_message:
|
|
|
|
return ''
|
|
|
|
return error_message.split(':', 1)[1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
def get_source_completed_tasks(source_id, only_errors=False):
|
|
|
|
'''
|
|
|
|
Returns a queryset of CompletedTask objects for a source by source ID.
|
|
|
|
'''
|
2020-12-08 05:56:43 +00:00
|
|
|
q = {'queue': source_id}
|
2020-12-07 10:26:46 +00:00
|
|
|
if only_errors:
|
|
|
|
q['failed_at__isnull'] = False
|
|
|
|
return CompletedTask.objects.filter(**q).order_by('-failed_at')
|
|
|
|
|
|
|
|
|
2020-12-08 14:31:45 +00:00
|
|
|
def delete_task(task_name, source_id):
|
|
|
|
return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete()
|
2020-12-07 10:26:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
def cleanup_completed_tasks():
|
|
|
|
days_to_keep = getattr(settings, 'COMPLETED_TASKS_DAYS_TO_KEEP', 30)
|
|
|
|
delta = timezone.now() - timedelta(days=days_to_keep)
|
|
|
|
log.info(f'Deleting completed tasks older than {days_to_keep} days '
|
|
|
|
f'(run_at before {delta})')
|
|
|
|
CompletedTask.objects.filter(run_at__lt=delta).delete()
|
2020-12-06 01:22:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
@background(schedule=0)
|
|
|
|
def index_source_task(source_id):
|
|
|
|
'''
|
|
|
|
Indexes media available from a Source object.
|
|
|
|
'''
|
|
|
|
try:
|
|
|
|
source = Source.objects.get(pk=source_id)
|
|
|
|
except Source.DoesNotExist:
|
|
|
|
# Task triggered but the Source has been deleted, delete the task
|
|
|
|
delete_index_source_task(source_id)
|
|
|
|
return
|
2020-12-07 10:26:46 +00:00
|
|
|
# Reset any errors
|
|
|
|
source.has_failed = False
|
|
|
|
source.save()
|
|
|
|
# Index the source
|
2020-12-06 01:22:16 +00:00
|
|
|
videos = source.index_media()
|
2020-12-07 10:26:46 +00:00
|
|
|
if not videos:
|
|
|
|
raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no '
|
|
|
|
f'media to index, is the source key valid? Check the '
|
|
|
|
f'source configuration is correct and that the source '
|
|
|
|
f'is reachable')
|
|
|
|
# Got some media, update the last crawl timestamp
|
|
|
|
source.last_crawl = timezone.now()
|
|
|
|
source.save()
|
|
|
|
log.info(f'Found {len(videos)} media items for source: {source}')
|
2020-12-06 01:22:16 +00:00
|
|
|
for video in videos:
|
|
|
|
# Create or update each video as a Media object
|
|
|
|
key = video.get(source.key_field, None)
|
|
|
|
if not key:
|
|
|
|
# Video has no unique key (ID), it can't be indexed
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
media = Media.objects.get(key=key)
|
|
|
|
except Media.DoesNotExist:
|
|
|
|
media = Media(key=key)
|
|
|
|
media.source = source
|
|
|
|
media.metadata = json.dumps(video)
|
2020-12-06 07:33:48 +00:00
|
|
|
upload_date = media.upload_date
|
|
|
|
if upload_date:
|
2020-12-06 08:10:00 +00:00
|
|
|
media.published = timezone.make_aware(upload_date)
|
2020-12-07 04:39:58 +00:00
|
|
|
try:
|
|
|
|
media.save()
|
|
|
|
log.info(f'Indexed media: {source} / {media}')
|
|
|
|
except IntegrityError as e:
|
|
|
|
log.error(f'Index media failed: {source} / {media} with "{e}"')
|
2020-12-07 10:26:46 +00:00
|
|
|
# Tack on a cleanup of old completed tasks
|
|
|
|
cleanup_completed_tasks()
|
2020-12-06 01:22:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
@background(schedule=0)
|
|
|
|
def download_media_thumbnail(media_id, url):
|
|
|
|
'''
|
2020-12-08 05:56:43 +00:00
|
|
|
Downloads an image from a URL and save it as a local thumbnail attached to a
|
2020-12-06 01:22:16 +00:00
|
|
|
Media object.
|
|
|
|
'''
|
|
|
|
try:
|
|
|
|
media = Media.objects.get(pk=media_id)
|
|
|
|
except Media.DoesNotExist:
|
|
|
|
# Task triggered but the media no longer exists, ignore task
|
|
|
|
return
|
2020-12-06 08:10:00 +00:00
|
|
|
width = getattr(settings, 'MEDIA_THUMBNAIL_WIDTH', 430)
|
|
|
|
height = getattr(settings, 'MEDIA_THUMBNAIL_HEIGHT', 240)
|
2020-12-06 01:22:16 +00:00
|
|
|
i = get_remote_image(url)
|
2020-12-06 07:33:48 +00:00
|
|
|
log.info(f'Resizing {i.width}x{i.height} thumbnail to '
|
2020-12-06 08:10:00 +00:00
|
|
|
f'{width}x{height}: {url}')
|
|
|
|
i = resize_image_to_height(i, width, height)
|
2020-12-06 01:22:16 +00:00
|
|
|
image_file = BytesIO()
|
|
|
|
i.save(image_file, 'JPEG', quality=80, optimize=True, progressive=True)
|
|
|
|
image_file.seek(0)
|
|
|
|
media.thumb.save(
|
|
|
|
'thumb',
|
|
|
|
SimpleUploadedFile(
|
|
|
|
'thumb',
|
|
|
|
image_file.read(),
|
|
|
|
'image/jpeg',
|
|
|
|
),
|
|
|
|
save=True
|
|
|
|
)
|
2020-12-06 02:48:10 +00:00
|
|
|
log.info(f'Saved thumbnail for: {media} from: {url}')
|
2020-12-06 01:22:16 +00:00
|
|
|
return True
|