task management and task runtime flow

This commit is contained in:
meeb 2020-12-07 21:26:46 +11:00
parent f6d00b47eb
commit 647f7162cc
22 changed files with 543 additions and 48 deletions

6
app/common/errors.py Normal file
View File

@ -0,0 +1,6 @@
class NoMediaException(Exception):
'''
Raised when a source returns no media to be indexed. Could be an invalid
playlist name or similar, or the upstream source returned an error.
'''
pass

View File

@ -2,9 +2,9 @@ $colour-white: #ffffff;
$colour-black: #000000;
$colour-near-black: #011627;
$colour-near-white: #fdfffc;
$colour-light-blue: #2e8ac4;
$colour-light-blue: #1e5c83;
$colour-red: #e71d36;
$colour-orange: #ef9912;
$colour-orange: #ff9c00;
$background-colour: $colour-near-white;
$text-colour: $colour-near-black;
@ -37,7 +37,7 @@ $form-help-text-colour: $colour-light-blue;
$form-delete-button-background-colour: $colour-red;
$collection-no-items-text-colour: $colour-near-black;
$collection-text-colour: $colour-near-black;
$collection-text-colour: $colour-light-blue;
$collection-background-hover-colour: $colour-orange;
$collection-text-hover-colour: $colour-near-white;
@ -52,6 +52,9 @@ $box-error-text-colour: $colour-near-white;
$infobox-background-colour: $colour-near-black;
$infobox-text-colour: $colour-near-white;
$errorbox-background-colour: $colour-red;
$errorbox-text-colour: $colour-near-white;
$pagination-background-colour: $colour-near-white;
$pagination-text-colour: $colour-near-black;
$pagination-border-colour: $colour-light-blue;
@ -61,3 +64,5 @@ $pagination-border-hover-colour: $colour-light-blue;
$pagination-current-background-colour: $colour-orange;
$pagination-current-text-colour: $colour-near-white;
$pagination-current-border-colour: $colour-orange;
$error-text-colour: $colour-red;

View File

@ -75,6 +75,12 @@ main {
font-size: 2rem;
}
h2 {
margin: 0;
padding: 2rem 0 0.5rem 0;
font-size: 1.5rem;
}
.btn {
width: 100%;
background-color: $main-button-background-colour;
@ -90,6 +96,7 @@ main {
.collection {
margin: 0.5rem 0 0 0 !important;
.collection-item {
transition: initial !important;
display: block;
}
a.collection-item {
@ -167,6 +174,15 @@ main {
color: $infobox-text-colour;
}
.errorbox {
background-color: $errorbox-background-colour;
color: $errorbox-text-colour;
}
.error-text {
color: $error-text-colour !important;
}
}
footer {

View File

@ -31,7 +31,7 @@
<li><a href="{% url 'sync:dashboard' %}"><i class="fas fa-fw fa-th-large"></i><span class="hide-on-med-and-down"> Dashboard</span></a></li>
<li><a href="{% url 'sync:sources' %}"><i class="fas fa-fw fa-play"></i><span class="hide-on-med-and-down"> Sources</span></a></li>
<li><a href="{% url 'sync:media' %}"><i class="fas fa-fw fa-film"></i><span class="hide-on-med-and-down"> Media</span></a></li>
<li><a href="{% url 'sync:tasks' %}"><i class="fas fa-fw fa-clock"></i><span class="hide-on-med-and-down"> Tasks</span></a></li>
<li><a href="{% url 'sync:tasks' %}"><i class="far fa-fw fa-clock"></i><span class="hide-on-med-and-down"> Tasks</span></a></li>
</ul>
</div>
</nav>

View File

@ -0,0 +1,11 @@
{% if message %}
<div class="row">
<div class="col s12">
<div class="card errorbox">
<div class="card-content">
<i class="fas fa-exclamation-triangle"></i> {{ message|safe }}
</div>
</div>
</div>
</div>
{% endif %}

View File

@ -6,7 +6,7 @@ from .models import Source, Media
class SourceAdmin(admin.ModelAdmin):
ordering = ('-created',)
list_display = ('name',)
list_display = ('name', 'get_source_type_display', 'last_crawl', 'has_failed')
readonly_fields = ('uuid', 'created')
search_fields = ('uuid', 'key', 'name')

View File

@ -0,0 +1,18 @@
# Generated by Django 3.1.4 on 2020-12-07 07:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0013_auto_20201207_0439'),
]
operations = [
migrations.AddField(
model_name='source',
name='has_errors',
field=models.BooleanField(default=False, help_text='Source has errors', verbose_name='has errors'),
),
]

View File

@ -0,0 +1,22 @@
# Generated by Django 3.1.4 on 2020-12-07 07:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0014_source_has_errors'),
]
operations = [
migrations.RemoveField(
model_name='source',
name='has_errors',
),
migrations.AddField(
model_name='source',
name='has_failed',
field=models.BooleanField(default=False, help_text='Source has failed to index media', verbose_name='has failed'),
),
]

View File

@ -221,6 +221,11 @@ class Source(models.Model):
default=FALLBACK_NEXT_HD,
help_text=_('What do do when media in your source resolution and codecs is not available')
)
has_failed = models.BooleanField(
_('has failed'),
default=False,
help_text=_('Source has failed to index media')
)
def __str__(self):
return self.name
@ -307,6 +312,8 @@ class Source(models.Model):
# Account for nested playlists, such as a channel of playlists of playlists
def _recurse_playlists(playlist):
videos = []
if not playlist:
return videos
entries = playlist.get('entries', [])
for entry in entries:
if not entry:

View File

@ -1,47 +1,98 @@
from django.conf import settings
from django.db.models.signals import post_save, pre_delete, post_delete
from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from background_task.signals import task_failed
from background_task.models import Task
from common.logger import log
from .models import Source, Media
from .tasks import delete_index_source_task, index_source_task, download_media_thumbnail
from .tasks import (delete_index_source_task, index_source_task,
download_media_thumbnail, map_task_to_instance)
from .utils import delete_file
@receiver(pre_save, sender=Source)
def source_pre_save(sender, instance, **kwargs):
# Triggered before a source is saved, if the schedule has been updated recreate
# its indexing task
try:
existing_source = Source.objects.get(pk=instance.pk)
except Source.DoesNotExist:
# Probably not possible?
return
if existing_source.index_schedule != instance.index_schedule:
# Indexing schedule has changed, recreate the indexing task
delete_index_source_task(str(instance.pk))
verbose_name = _('Index media from source "{}"')
index_source_task(
str(instance.pk),
repeat=instance.index_schedule,
verbose_name=verbose_name.format(instance.name)
)
@receiver(post_save, sender=Source)
def source_post_save(sender, instance, created, **kwargs):
# Triggered when a source is saved, delete any source tasks that might exist
# Triggered after a source is saved
if created:
# Create a new indexing task for newly created sources
delete_index_source_task(str(instance.pk))
# Create a new scheduled indexing task as the repeat schedule may have changed
index_source_task(str(instance.pk), repeat=instance.index_schedule)
log.info(f'Scheduling media indexing for source: {instance.name}')
verbose_name = _('Index media from source "{}"')
index_source_task(
str(instance.pk),
repeat=instance.index_schedule,
verbose_name=verbose_name.format(instance.name)
)
@receiver(pre_delete, sender=Source)
def source_post_delete(sender, instance, **kwargs):
# Triggered just before a source is deleted, delete all media objects to trigger
# Triggered before a source is deleted, delete all media objects to trigger
# the Media models post_delete signal
for media in Media.objects.filter(source=instance):
log.info(f'Deleting media for source: {instance.name} item: {media.name}')
media.delete()
@receiver(post_delete, sender=Source)
def source_post_delete(sender, instance, **kwargs):
# Triggered when a source is deleted
# Triggered after a source is deleted
log.info(f'Deleting tasks for source: {instance.name}')
delete_index_source_task(str(instance.pk))
@receiver(task_failed, sender=Task)
def task_task_failed(sender, task_id, completed_task, **kwargs):
# Triggered after a source fails by reaching its max retry attempts
obj, url = map_task_to_instance(completed_task)
if isinstance(obj, Source):
log.error(f'Permanent failure for source: {obj} task: {completed_task}')
obj.has_failed = True
obj.save()
@receiver(post_save, sender=Media)
def media_post_save(sender, instance, created, **kwargs):
# Triggered when media is saved
# Triggered after media is saved
if created:
# If the media is newly created fire a task off to download its thumbnail
metadata = instance.loaded_metadata
thumbnail_url = metadata.get('thumbnail', '')
if thumbnail_url:
download_media_thumbnail(str(instance.pk), thumbnail_url)
log.info(f'Scheduling task to download thumbnail for: {instance.name} '
f'from: {thumbnail_url}')
verbose_name = _('Downloading media thumbnail for "{}')
download_media_thumbnail(
str(instance.pk),
thumbnail_url,
verbose_name=verbose_name.format(instance.name)
)
@receiver(post_delete, sender=Media)
def media_post_delete(sender, instance, **kwargs):
# Triggered when media is deleted, delete media thumbnail
# Triggered after media is deleted, delete media thumbnail
if instance.thumb:
log.info(f'Deleting thumbnail for: {instance} path: {instance.thumb.path}')
delete_file(instance.thumb.path)

View File

@ -6,30 +6,109 @@
import json
import math
import uuid
from io import BytesIO
from hashlib import sha1
from datetime import timedelta
from PIL import Image
from django.conf import settings
from django.core.files.uploadedfile import SimpleUploadedFile
from django.utils import timezone
from django.db.utils import IntegrityError
from background_task import background
from background_task.models import Task
from background_task.models import Task, CompletedTask
from common.logger import log
from common.errors import NoMediaException
from .models import Source, Media
from .utils import get_remote_image, resize_image_to_height
def delete_index_source_task(source_id):
task = None
def get_hash(task_name, pk):
'''
Create a background_task compatible hash for a Task or CompletedTask.
'''
task_params = json.dumps(((str(pk),), {}), sort_keys=True)
return sha1(f'{task_name}{task_params}'.encode('utf-8')).hexdigest()
def map_task_to_instance(task):
'''
Reverse-maps an scheduled backgrond task to an instance. Requires the task name
to be a known task function and the first argument to be a UUID. This is used
because UUID's are incompatible with background_task's "creator" feature.
'''
TASK_MAP = {
'sync.tasks.index_source_task': Source,
'sync.tasks.download_media_thumbnail': Media,
}
MODEL_URL_MAP = {
Source: 'sync:source',
Media: 'sync:media-item',
}
task_func, task_args_str = task.task_name, task.task_params
model = TASK_MAP.get(task_func, None)
if not model:
return None, None
url = MODEL_URL_MAP.get(model, None)
if not url:
return None, None
try:
# get_task currently returns a QuerySet, but catch DoesNotExist just in case
task = Task.objects.get_task('sync.tasks.index_source_task', args=(source_id,))
except Task.DoesNotExist:
pass
if task:
# A scheduled task exists for this Source, delete it
log.info(f'Deleting Source index task: {task}')
task.delete()
task_args = json.loads(task_args_str)
except (TypeError, ValueError, AttributeError):
return None, None
if len(task_args) != 2:
return None, None
args, kwargs = task_args
if len(args) == 0:
return None, None
instance_uuid_str = args[0]
try:
instance_uuid = uuid.UUID(instance_uuid_str)
except (TypeError, ValueError, AttributeError):
return None, None
try:
instance = model.objects.get(pk=instance_uuid)
return instance, url
except model.DoesNotExist:
return None, None
def get_error_message(task):
'''
Extract an error message from a failed task.
'''
if not task.has_error():
return ''
stacktrace_lines = task.last_error.strip().split('\n')
if len(stacktrace_lines) == 0:
return ''
error_message = stacktrace_lines[-1].strip()
if ':' not in error_message:
return ''
return error_message.split(':', 1)[1].strip()
def get_source_completed_tasks(source_id, only_errors=False):
'''
Returns a queryset of CompletedTask objects for a source by source ID.
'''
source_hash = get_hash('sync.tasks.index_source_task', source_id)
q = {'task_hash': source_hash}
if only_errors:
q['failed_at__isnull'] = False
return CompletedTask.objects.filter(**q).order_by('-failed_at')
def delete_index_source_task(source_id):
Task.objects.drop_task('sync.tasks.index_source_task', args=(source_id,))
def cleanup_completed_tasks():
days_to_keep = getattr(settings, 'COMPLETED_TASKS_DAYS_TO_KEEP', 30)
delta = timezone.now() - timedelta(days=days_to_keep)
log.info(f'Deleting completed tasks older than {days_to_keep} days '
f'(run_at before {delta})')
CompletedTask.objects.filter(run_at__lt=delta).delete()
@background(schedule=0)
@ -43,7 +122,20 @@ def index_source_task(source_id):
# Task triggered but the Source has been deleted, delete the task
delete_index_source_task(source_id)
return
# Reset any errors
source.has_failed = False
source.save()
# Index the source
videos = source.index_media()
if not videos:
raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no '
f'media to index, is the source key valid? Check the '
f'source configuration is correct and that the source '
f'is reachable')
# Got some media, update the last crawl timestamp
source.last_crawl = timezone.now()
source.save()
log.info(f'Found {len(videos)} media items for source: {source}')
for video in videos:
# Create or update each video as a Media object
key = video.get(source.key_field, None)
@ -64,6 +156,8 @@ def index_source_task(source_id):
log.info(f'Indexed media: {source} / {media}')
except IntegrityError as e:
log.error(f'Index media failed: {source} / {media} with "{e}"')
# Tack on a cleanup of old completed tasks
cleanup_completed_tasks()
@background(schedule=0)

View File

@ -3,6 +3,11 @@
{% block headtitle %}Dashboard{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<h1 class="truncate">Dashboard</h1>
</div>
</div>
<div class="row">
<div class="col s12">
<div class="card">

View File

@ -3,6 +3,11 @@
{% block headtitle %}Media{% if source %} - {{ source }}{% endif %}{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<h1 class="truncate">Media</h1>
</div>
</div>
{% include 'infobox.html' with message=message %}
<div class="row no-margin-bottom">
{% for m in media %}
@ -22,7 +27,7 @@
{% empty %}
<div class="col s12">
<div class="collection">
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> No media has been indexed.</span>
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> No media has been indexed{% if source %} that matches the specified source filter{% endif %}.</span>
</div>
</div>
{% endfor %}

View File

@ -3,14 +3,22 @@
{% block headtitle %}Source - {{ source.name }}{% endblock %}
{% block content %}
<div class="row">
<div class="row no-margin-bottom">
<div class="col s12">
<h1 class="truncate">Source <strong>{{ source.name }}</strong></h1>
<p class="truncate"><strong><a href="{{ source.url }}" target="_blank"><i class="fas fa-link"></i> {{ source.url }}</a></strong></p>
<p class="truncate">Saving to: <strong>{{ source.directory_path }}</strong></p>
<p><a href="{% url 'sync:media' %}?filter={{ source.pk }}" class="btn">Media<span class="hide-on-small-only"> linked to this source</span> <i class="fas fa-fw fa-film"></i></a></p>
</div>
</div>
<div class="row">
<div class="col s12 l6 margin-bottom">
<a href="{% url 'sync:media' %}?filter={{ source.pk }}" class="btn">View media<span class="hide-on-small-only"> linked to this source</span> <i class="fas fa-fw fa-film"></i></a>
</div>
<div class="col s12 l6 margin-bottom">
<a href="{% url 'sync:tasks-completed' %}?filter={{ source.pk }}" class="btn">View tasks<span class="hide-on-small-only"> linked to this source</span> <i class="far fa-fw fa-clock"></i></a>
</div>
</div>
{% if source.has_failed %}{% include 'errorbox.html' with message='This source has encountered permanent failures listed at the bottom of this page, check its settings' %}{% endif %}
<div class="row">
<div class="col s12">
<table class="striped">
@ -22,6 +30,10 @@
<td class="hide-on-small-only">Name</td>
<td><span class="hide-on-med-and-up">Name<br></span><strong>{{ source.name }}</strong></td>
</tr>
<tr title="Number of media items downloaded for the source">
<td class="hide-on-small-only">Media items</td>
<td><span class="hide-on-med-and-up">Media items<br></span><strong><a href="{% url 'sync:media' %}?filter={{ source.pk }}">{{ media|length }}</a></strong></td>
</tr>
<tr title="Unique key of the source, such as the channel name or playlist ID">
<td class="hide-on-small-only">Key</td>
<td><span class="hide-on-med-and-up">Key<br></span><strong>{{ source.key }}</strong></td>
@ -98,4 +110,20 @@
<a href="{% url 'sync:delete-source' pk=source.pk %}" class="btn delete-button">Delete source <i class="fas fa-trash-alt"></i></a>
</div>
</div>
{% if errors %}
<div class="row">
<div class="col s12">
<h2>Source has encountered {{ errors|length }} Error{{ errors|length|pluralize }}</h2>
<div class="collection">
{% for task in errors %}
<span class="collection-item error-text">
<i class="fas fa-exclamation-triangle"></i> <strong>{{ task.verbose_name }}</strong><br>
Error: &quot;{{ task.error_message }}&quot;<br>
<i class="far fa-clock"></i> Occured at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
</span>
{% endfor %}
</div>
</div>
</div>
{% endif %}
{% endblock %}

View File

@ -3,8 +3,13 @@
{% block headtitle %}Sources{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<h1 class="truncate">Sources</h1>
</div>
</div>
{% include 'infobox.html' with message=message %}
<div class="row no-margin-bottom">
<div class="row">
<div class="col s12 l6 margin-bottom">
<a href="{% url 'sync:validate-source' source_type='youtube-channel' %}" class="btn">Add a YouTube channel <i class="fab fa-youtube"></i></a>
</div>
@ -17,9 +22,13 @@
<div class="collection">
{% for source in sources %}
<a href="{% url 'sync:source' pk=source.pk %}" class="collection-item">
{{ source.icon|safe }} <strong>{{ source.name }}</strong> ({{ source.get_source_type_display }})<br>
{{ source.icon|safe }} <strong>{{ source.name }}</strong> ({{ source.get_source_type_display }} &quot;{{ source.key }}&quot;)<br>
{{ source.format_summary }}<br>
{% if source.has_failed %}
<span class="error-text"><i class="fas fa-exclamation-triangle"></i> <strong>Source has permanent failures</strong></span>
{% else %}
<strong>{{ source.media_count }}</strong> media items{% if source.delete_old_media and source.days_to_keep > 0 %}, keep {{ source.days_to_keep }} days of media{% endif %}
{% endif %}
</a>
{% empty %}
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> You haven't added any sources.</span>

View File

@ -0,0 +1,33 @@
{% extends 'base.html' %}
{% block headtitle %}Tasks - Completed{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<h1 class="truncate">Completed tasks</h1>
</div>
</div>
{% include 'infobox.html' with message=message %}
<div class="row">
<div class="col s12">
<div class="collection">
{% for task in tasks %}
<span class="collection-item">
{% if task.has_error %}
<i class="fas fa-exclamation-triangle"></i> <strong>{{ task.verbose_name }}</strong><br>
Error: &quot;{{ task.error_message }}&quot;<br>
<i class="far fa-clock"></i> Task started at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
{% else %}
<i class="fas fa-check"></i> <strong>{{ task.verbose_name }}</strong><br>
<i class="far fa-clock"></i> Task started at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
{% endif %}
</span>
{% empty %}
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> There have been no completed tasks.</span>
{% endfor %}
</div>
</div>
</div>
{% include 'pagination.html' with pagination=sources.paginator filter=source.pk %}
{% endblock %}

View File

@ -5,7 +5,83 @@
{% block content %}
<div class="row">
<div class="col s12">
tasks
<h1>Tasks</h1>
<p>
Tasks are the background work that TubeSync undertakes to index and download
media. This page allows you to see basic overview of what is running and what is
scheduled to perform in the future as well as check up on any errors that might
have occured.
</p>
</div>
</div>
<div class="row">
<div class="col s12">
<h2>{{ running|length }} Running</h2>
<p>
Running tasks are tasks which currently being worked on right now.
</p>
<div class="collection">
{% for task in running %}
<a href="{% url task.url pk=task.instance.pk %}" class="collection-item">
<i class="fas fa-running"></i> <strong>{{ task }}</strong><br>
<i class="far fa-clock"></i> Task started at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
</a>
{% empty %}
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> There are no running tasks.</span>
{% endfor %}
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<h2>{{ errors|length }} Error{{ errors|length|pluralize }}</h2>
<p>
Tasks which generated an error are shown here. Tasks are retried a couple of
times, so if there was an intermittent error such as a download got interrupted
it will be scheduled to run again.
</p>
<div class="collection">
{% for task in errors %}
<a href="{% url task.url pk=task.instance.pk %}" class="collection-item error-text">
<i class="fas fa-exclamation-triangle"></i> <strong>{{ task }}</strong>, attempted {{ task.attempts }} time{{ task.attempts|pluralize }}<br>
Error: &quot;{{ task.error_message }}&quot;<br>
<i class="fas fa-history"></i> Task will be retried at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
</a>
{% empty %}
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> There are no tasks with errors.</span>
{% endfor %}
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<h2>{{ scheduled|length }} Scheduled</h2>
<p>
Tasks which are scheduled to run in the future or are waiting in a queue to be
processed. They can be waiting for an available worker to run immediately, or
run in the future at the specified &quot;run at&quot; time.
</p>
<div class="collection">
{% for task in scheduled %}
<a href="{% url task.url pk=task.instance.pk %}" class="collection-item">
<i class="far fa-stopwatch"></i> <strong>{{ task }}</strong><br>
Scheduled to run {{ task.instance.get_index_schedule_display|lower }}.<br>
<i class="fas fa-redo"></i> Task will run at <strong>{{ task.run_at|date:'Y-m-d H:i:s' }}</strong>
</a>
{% empty %}
<span class="collection-item no-items"><i class="fas fa-info-circle"></i> There are no scheduled tasks.</span>
{% endfor %}
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<h2>Completed</h2>
<p>
A record of recently completed tasks is kept for a few days. You can use the button
below to view recent tasks which have completed successfully.
</p>
<a href="{% url 'sync:tasks-completed' %}" class="btn"><span class="hide-on-med-and-down">View </span>Completed tasks <i class="fas fa-check-double"></i></a>
</div>
</div>
{% endblock %}

View File

@ -1,7 +1,7 @@
from django.urls import path
from .views import (DashboardView, SourcesView, ValidateSourceView, AddSourceView,
SourceView, UpdateSourceView, DeleteSourceView, MediaView,
MediaThumbView, MediaItemView, TasksView)
MediaThumbView, MediaItemView, TasksView, CompletedTasksView)
app_name = 'sync'
@ -61,4 +61,8 @@ urlpatterns = [
TasksView.as_view(),
name='tasks'),
path('tasks-completed',
CompletedTasksView.as_view(),
name='tasks-completed'),
]

View File

@ -14,25 +14,31 @@ def validate_url(url, validator):
Validate a URL against a dict of validation requirements. Returns an extracted
part of the URL if the URL is valid, if invalid raises a ValidationError.
'''
valid_scheme, valid_netloc, valid_path, valid_query, extract_parts = (
valid_scheme, valid_netloc, valid_path, invalid_paths, valid_query, \
extract_parts = (
validator['scheme'], validator['domain'], validator['path_regex'],
validator['qs_args'], validator['extract_key'])
validator['path_must_not_match'], validator['qs_args'],
validator['extract_key']
)
url_parts = urlsplit(str(url).strip())
url_scheme = str(url_parts.scheme).strip().lower()
if url_scheme != valid_scheme:
raise ValidationError(f'scheme "{url_scheme}" must be "{valid_scheme}"')
raise ValidationError(f'invalid scheme "{url_scheme}" must be "{valid_scheme}"')
url_netloc = str(url_parts.netloc).strip().lower()
if url_netloc != valid_netloc:
raise ValidationError(f'domain "{url_netloc}" must be "{valid_netloc}"')
raise ValidationError(f'invalid domain "{url_netloc}" must be "{valid_netloc}"')
url_path = str(url_parts.path).strip()
matches = re.findall(valid_path, url_path)
if not matches:
raise ValidationError(f'path "{url_path}" must match "{valid_path}"')
raise ValidationError(f'invalid path "{url_path}" must match "{valid_path}"')
for invalid_path in invalid_paths:
if url_path.lower() == invalid_path.lower():
raise ValidationError(f'path "{url_path}" is not valid')
url_query = str(url_parts.query).strip()
url_query_parts = parse_qs(url_query)
for required_query in valid_query:
if required_query not in url_query_parts:
raise ValidationError(f'query string "{url_query}" must '
raise ValidationError(f'invalid query string "{url_query}" must '
f'contain the parameter "{required_query}"')
extract_from, extract_param = extract_parts
extract_value = ''
@ -76,7 +82,7 @@ def resize_image_to_height(image, width, height):
if scaled_width > width:
# Width too large, crop it
delta = scaled_width - width
left, upper = (delta / 2), 0
left, upper = round(delta / 2), 0
right, lower = (left + width), height
image = image.crop((left, upper, right, lower))
return image

View File

@ -9,11 +9,14 @@ from django.urls import reverse_lazy
from django.db.models import Count
from django.forms import ValidationError
from django.utils.text import slugify
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from common.utils import append_uri_params
from background_task.models import Task, CompletedTask
from .models import Source, Media
from .forms import ValidateSourceForm, ConfirmDeleteSourceForm
from .utils import validate_url
from .tasks import map_task_to_instance, get_error_message, get_source_completed_tasks
from . import signals
from . import youtube
@ -108,6 +111,7 @@ class ValidateSourceView(FormView):
'scheme': 'https',
'domain': 'www.youtube.com',
'path_regex': '^\/(c\/)?([^\/]+)$',
'path_must_not_match': ('/playlist',),
'qs_args': [],
'extract_key': ('path_regex', 1),
'example': 'https://www.youtube.com/SOMECHANNEL'
@ -116,7 +120,8 @@ class ValidateSourceView(FormView):
'scheme': 'https',
'domain': 'www.youtube.com',
'path_regex': '^\/(playlist|watch)$',
'qs_args': ['list'],
'path_must_not_match': (),
'qs_args': ('list',),
'extract_key': ('qs_args', 'list'),
'example': 'https://www.youtube.com/playlist?list=PLAYLISTID'
},
@ -239,6 +244,16 @@ class SourceView(DetailView):
template_name = 'sync/source.html'
model = Source
def get_context_data(self, *args, **kwargs):
data = super().get_context_data(*args, **kwargs)
data['errors'] = []
for error in get_source_completed_tasks(self.object.pk, only_errors=True):
error_message = get_error_message(error)
setattr(error, 'error_message', error_message)
data['errors'].append(error)
data['media'] = Media.objects.filter(source=self.object).order_by('-published')
return data
class UpdateSourceView(UpdateView):
@ -286,7 +301,7 @@ class MediaView(ListView):
context_object_name = 'media'
paginate_by = settings.MEDIA_PER_PAGE
messages = {
'filter': _('Viewing media for source: <strong>{name}</strong>'),
'filter': _('Viewing media filtered for source: <strong>{name}</strong>'),
}
def __init__(self, *args, **kwargs):
@ -352,13 +367,90 @@ class MediaItemView(DetailView):
model = Media
class TasksView(TemplateView):
class TasksView(ListView):
'''
A list of tasks queued to be completed. Typically, this is scraping for new
media or downloading media.
'''
template_name = 'sync/tasks.html'
context_object_name = 'tasks'
def get_queryset(self):
return Task.objects.all().order_by('run_at')
def get_context_data(self, *args, **kwargs):
data = super().get_context_data(*args, **kwargs)
data['running'] = []
data['errors'] = []
data['scheduled'] = []
queryset = self.get_queryset()
now = timezone.now()
for task in queryset:
obj, url = map_task_to_instance(task)
if not obj:
# Orphaned task, ignore it (it will be deleted when it fires)
continue
setattr(task, 'instance', obj)
setattr(task, 'url', url)
if task.locked_by_pid_running():
data['running'].append(task)
elif task.has_error():
error_message = get_error_message(task)
setattr(task, 'error_message', error_message)
data['errors'].append(task)
else:
data['scheduled'].append(task)
return data
class CompletedTasksView(ListView):
'''
List of tasks which have been completed with an optional per-source filter.
'''
template_name = 'sync/tasks-completed.html'
context_object_name = 'tasks'
paginate_by = settings.TASKS_PER_PAGE
messages = {
'filter': _('Viewing tasks filtered for source: <strong>{name}</strong>'),
}
def __init__(self, *args, **kwargs):
self.filter_source = None
super().__init__(*args, **kwargs)
def dispatch(self, request, *args, **kwargs):
filter_by = request.GET.get('filter', '')
if filter_by:
try:
self.filter_source = Source.objects.get(pk=filter_by)
except Source.DoesNotExist:
self.filter_source = None
return super().dispatch(request, *args, **kwargs)
def get_queryset(self):
return CompletedTask.objects.all().order_by('-run_at')
def get_queryset(self):
if self.filter_source:
return CompletedTask.objects.all().order_by('-run_at')
#tasks = []
#for task in CompletedTask.objects.all().order_by('-run_at'):
# # ???
#q = Media.objects.filter(source=self.filter_source)
return CompletedTask.objects.all().order_by('-run_at')
def get_context_data(self, *args, **kwargs):
data = super().get_context_data(*args, **kwargs)
for task in data['tasks']:
if task.has_error():
error_message = get_error_message(task)
setattr(task, 'error_message', error_message)
data['message'] = ''
data['source'] = None
if self.filter_source:
message = str(self.messages.get('filter', ''))
data['message'] = message.format(name=self.filter_source.name)
data['source'] = self.filter_source
return data

View File

@ -1,13 +1,15 @@
import os
from pathlib import Path
from
BASE_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = Path('/')
SECRET_KEY = str(os.getenv('DJANGO_SECRET_KEY', ''))
ALLOWED_HOSTS_STR = str(os.getenv('DJANGO_ALLOWED_HOSTS', ''))
RANDOM_SECRET = hexlify(os.urandom(32)).decode()
SECRET_KEY = str(os.getenv('DJANGO_SECRET_KEY', RANDOM_SECRET))
ALLOWED_HOSTS_STR = str(os.getenv('TUBESYNC_HOSTS', 'localhost'))
ALLOWED_HOSTS = ALLOWED_HOSTS_STR.split(',')
@ -19,6 +21,9 @@ DATABASES = {
}
BACKGROUND_TASK_ASYNC_THREADS = int(os.get('TUBESYNC_WORKERS', 2))
MEDIA_ROOT = ROOT_DIR / 'config' / 'media'
SYNC_VIDEO_ROOT = ROOT_DIR / 'downloads' / 'video'
SYNC_AUDIO_ROOT = ROOT_DIR / 'downloads' / 'audio'

View File

@ -118,10 +118,12 @@ MAX_RUN_TIME = 1800 # Maximum amount of time in seconds
BACKGROUND_TASK_RUN_ASYNC = True # Run tasks async in the background
BACKGROUND_TASK_ASYNC_THREADS = 2 # Number of async tasks to run at once
BACKGROUND_TASK_PRIORITY_ORDERING = 'DESC' # Process high priority tasks first
COMPLETED_TASKS_DAYS_TO_KEEP = 30 # Number of days to keep completed tasks
SOURCES_PER_PAGE = 36
MEDIA_PER_PAGE = 36
TASKS_PER_PAGE = 100
MEDIA_THUMBNAIL_WIDTH = 430 # Width in pixels to resize thumbnails to