Merge pull request #425 from locke4/main

Add support for regex video title filtering
This commit is contained in:
meeb 2023-11-20 16:53:58 +11:00 committed by GitHub
commit 33b471175a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 127 additions and 30 deletions

View File

@ -4,6 +4,7 @@ env:
IMAGE_NAME: tubesync IMAGE_NAME: tubesync
on: on:
workflow_dispatch:
push: push:
branches: branches:
- main - main

View File

@ -3,7 +3,7 @@
<div class="col s12"> <div class="col s12">
<div class="pagination"> <div class="pagination">
{% for i in paginator.page_range %} {% for i in paginator.page_range %}
<a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}">{{ i }}</a> <a class="pagenum{% if i == page_obj.number %} currentpage{% endif %}" href="?{% if filter %}filter={{ filter }}&{% endif %}page={{ i }}{% if show_skipped %}&show_skipped=yes{% endif %}{% if only_skipped %}&only_skipped=yes{% endif %}">{{ i }}</a>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>

View File

@ -0,0 +1,29 @@
# Generated by Django 3.2.22 on 2023-10-24 17:25
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0019_add_delete_removed_media'),
]
operations = [
migrations.AddField(
model_name='source',
name='filter_text',
field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
),
migrations.AlterField(
model_name='source',
name='auto_subtitles',
field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
),
migrations.AlterField(
model_name='source',
name='sub_langs',
field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
),
]

View File

@ -1,6 +1,7 @@
import os import os
import uuid import uuid
import json import json
import re
from xml.etree import ElementTree from xml.etree import ElementTree
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -287,6 +288,13 @@ class Source(models.Model):
help_text=_('If "delete old media" is ticked, the number of days after which ' help_text=_('If "delete old media" is ticked, the number of days after which '
'to automatically delete media') 'to automatically delete media')
) )
filter_text = models.CharField(
_('filter string'),
max_length=100,
default='',
blank=True,
help_text=_('Regex compatible filter string for video titles')
)
delete_removed_media = models.BooleanField( delete_removed_media = models.BooleanField(
_('delete removed media'), _('delete removed media'),
default=False, default=False,
@ -538,6 +546,9 @@ class Source(models.Model):
except Exception as e: except Exception as e:
return '' return ''
def is_regex_match(self, media_item_title):
return bool(re.search(self.filter_text,media_item_title))
def index_media(self): def index_media(self):
''' '''
Index the media source returning a list of media metadata as dicts. Index the media source returning a list of media metadata as dicts.

View File

@ -104,36 +104,57 @@ def media_post_save(sender, instance, created, **kwargs):
# already been downloaded # already been downloaded
if not instance.downloaded: if not instance.downloaded:
max_cap_age = instance.source.download_cap_date max_cap_age = instance.source.download_cap_date
filter_text = instance.source.filter_text
published = instance.published published = instance.published
if instance.skip:
#currently marked to be skipped, check if skip conditions still apply
if not published: if not published:
if not instance.skip:
log.warn(f'Media: {instance.source} / {instance} has no published date '
f'set, marking to be skipped')
instance.skip = True
cap_changed = True
else:
log.debug(f'Media: {instance.source} / {instance} has no published date ' log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped') f'set but is already marked to be skipped')
else: else:
if max_cap_age: if max_cap_age and filter_text:
if published > max_cap_age and instance.skip: if (published > max_cap_age) and (source.is_regex_match(instance.title)):
# Media was published after the cap date and matches the filter text, but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False
cap_changed = True
else:
print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped')
elif max_cap_age:
if published > max_cap_age:
# Media was published after the cap date but is set to be skipped # Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid ' log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped') f'publishing date, marking to be unskipped')
instance.skip = False instance.skip = False
cap_changed = True cap_changed = True
elif published <= max_cap_age and not instance.skip: elif filter_text:
if source.is_regex_match(instance.title):
# Media matches the filter text but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False
cap_changed = True
else:
if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')
instance.skip = True
cap_changed = True
else:
if max_cap_age:
if published <= max_cap_age:
log.info(f'Media: {instance.source} / {instance} is too old for ' log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped') f'the download cap date, marking to be skipped')
instance.skip = True instance.skip = True
cap_changed = True cap_changed = True
else: if filter_text:
if instance.skip: if not re.search(filter_text,instance.title):
# Media marked to be skipped but source download cap removed #media doesn't match the filter text but is not marked to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid ' log.info(f'Media: {instance.source} / {instance} does not match the filter text')
f'publishing date, marking to be unskipped') instance.skip = True
instance.skip = False
cap_changed = True cap_changed = True
# Recalculate the "can_download" flag, this may # Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed # need to change if the source specifications have been changed
if instance.metadata: if instance.metadata:

View File

@ -254,6 +254,11 @@ def download_media_metadata(media_id):
log.warn(f'Media: {source} / {media} is older than cap age ' log.warn(f'Media: {source} / {media} is older than cap age '
f'{max_cap_age}, skipping') f'{max_cap_age}, skipping')
media.skip = True media.skip = True
# If the source has a search filter, check the video title matches the filter
if source.filter_text and not source.is_regex_match(media.title):
# Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping')
media.skip = True
# If the source has a cut-off check the upload date is within the allowed delta # If the source has a cut-off check the upload date is within the allowed delta
if source.delete_old_media and source.days_to_keep > 0: if source.delete_old_media and source.days_to_keep > 0:
if not isinstance(media.published, datetime): if not isinstance(media.published, datetime):

View File

@ -64,5 +64,5 @@
</div> </div>
{% endfor %} {% endfor %}
</div> </div>
{% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped %} {% include 'pagination.html' with pagination=sources.paginator filter=source.pk show_skipped=show_skipped only_skipped=only_skipped%}
{% endblock %} {% endblock %}

View File

@ -43,6 +43,10 @@
<td class="hide-on-small-only">Directory</td> <td class="hide-on-small-only">Directory</td>
<td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td> <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ source.directory }}</strong></td>
</tr> </tr>
<tr title="Filter text">
<td class="hide-on-small-only">Filter text</td>
<td><span class="hide-on-med-and-up">Filter text<br></span><strong>{{ source.filter_text }}</strong></td>
</tr>
<tr title="Media file name format to use for saving files"> <tr title="Media file name format to use for saving files">
<td class="hide-on-small-only">Media format</td> <td class="hide-on-small-only">Media format</td>
<td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td> <td><span class="hide-on-med-and-up">Media format<br></span><strong>{{ source.media_format }}</strong></td>

View File

@ -175,6 +175,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': 3600, 'index_schedule': 3600,
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,
@ -217,6 +218,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_HOUR, 'index_schedule': Source.IndexSchedule.EVERY_HOUR,
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,
@ -247,6 +249,7 @@ class FrontEndTestCase(TestCase):
'directory': 'testdirectory', 'directory': 'testdirectory',
'media_format': settings.MEDIA_FORMATSTR_DEFAULT, 'media_format': settings.MEDIA_FORMATSTR_DEFAULT,
'download_cap': 0, 'download_cap': 0,
'filter_text':'.*',
'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed 'index_schedule': Source.IndexSchedule.EVERY_2_HOURS, # changed
'delete_old_media': False, 'delete_old_media': False,
'days_to_keep': 14, 'days_to_keep': 14,
@ -1468,6 +1471,29 @@ class FormatMatchingTestCase(TestCase):
self.media.get_best_video_format() self.media.get_best_video_format()
self.media.get_best_audio_format() self.media.get_best_audio_format()
def test_is_regex_match(self):
self.media.metadata = all_test_metadata['boring']
expected_matches = {
('.*'): (True),
('no fancy stuff'): (True),
('No fancy stuff'): (False),
('(?i)No fancy stuff'): (True), #set case insensitive flag
('no'): (True),
('Foo'): (False),
('^(?!.*fancy).*$'): (False),
('^(?!.*funny).*$'): (True),
('(?=.*f.*)(?=.{0,2}|.{4,})'): (True),
('f{4,}'): (False),
('^[^A-Z]*$'): (True),
('^[^a-z]*$'): (False),
('^[^\\s]*$'): (False)
}
for params, expected in expected_matches.items():
self.source.filter_text = params
expected_match_result = expected
self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result)
class TasksTestCase(TestCase): class TasksTestCase(TestCase):
def setUp(self): def setUp(self):

View File

@ -294,7 +294,7 @@ class ValidateSourceView(FormView):
class EditSourceMixin: class EditSourceMixin:
model = Source model = Source
fields = ('source_type', 'key', 'name', 'directory', 'media_format', fields = ('source_type', 'key', 'name', 'directory', 'filter_text', 'media_format',
'index_schedule', 'download_media', 'download_cap', 'delete_old_media', 'index_schedule', 'download_media', 'download_cap', 'delete_old_media',
'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec', 'delete_removed_media', 'days_to_keep', 'source_resolution', 'source_vcodec',
'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails', 'source_acodec', 'prefer_60fps', 'prefer_hdr', 'fallback', 'copy_thumbnails',