Merge pull request #7 from locke4/locke4-patch-2

Updated according to comments on PR
This commit is contained in:
locke4 2023-10-24 18:38:17 +01:00 committed by GitHub
commit cf06f4cbc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 34 deletions

View File

@ -24,7 +24,6 @@ class Migration(migrations.Migration):
('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')), ('source_type', models.CharField(choices=[('c', 'YouTube channel'), ('p', 'YouTube playlist')], db_index=True, default='c', help_text='Source type', max_length=1, verbose_name='source type')),
('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')), ('key', models.CharField(db_index=True, help_text='Source key, such as exact YouTube channel name or playlist ID', max_length=100, unique=True, verbose_name='key')),
('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')), ('name', models.CharField(db_index=True, help_text='Friendly name for the source, used locally in TubeSync only', max_length=100, unique=True, verbose_name='name')),
('filter_text', models.CharField(db_index=True, help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter text')),
('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')), ('directory', models.CharField(db_index=True, help_text='Directory name to save the media into', max_length=100, unique=True, verbose_name='directory')),
('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')), ('index_schedule', models.IntegerField(choices=[(3600, 'Every hour'), (7200, 'Every 2 hours'), (10800, 'Every 3 hours'), (14400, 'Every 4 hours'), (18000, 'Every 5 hours'), (21600, 'Every 6 hours'), (43200, 'Every 12 hours'), (86400, 'Every 24 hours')], db_index=True, default=21600, help_text='Schedule of how often to index the source for new media', verbose_name='index schedule')),
('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')), ('delete_old_media', models.BooleanField(default=False, help_text='Delete old media after "days to keep" days?', verbose_name='delete old media')),

View File

@ -0,0 +1,29 @@
# Generated by Django 3.2.22 on 2023-10-24 17:25
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0019_add_delete_removed_media'),
]
operations = [
migrations.AddField(
model_name='source',
name='filter_text',
field=models.CharField(blank=True, default='', help_text='Regex compatible filter string for video titles', max_length=100, verbose_name='filter string'),
),
migrations.AlterField(
model_name='source',
name='auto_subtitles',
field=models.BooleanField(default=False, help_text='Accept auto-generated subtitles', verbose_name='accept auto-generated subs'),
),
migrations.AlterField(
model_name='source',
name='sub_langs',
field=models.CharField(default='en', help_text='List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat', max_length=30, validators=[django.core.validators.RegexValidator(message='Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat', regex='^(\\-?[\\_\\.a-zA-Z]+,)*(\\-?[\\_\\.a-zA-Z]+){1}$')], verbose_name='subs langs'),
),
]

View File

@ -1,6 +1,7 @@
import os import os
import uuid import uuid
import json import json
import re
from xml.etree import ElementTree from xml.etree import ElementTree
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -290,7 +291,7 @@ class Source(models.Model):
filter_text = models.CharField( filter_text = models.CharField(
_('filter string'), _('filter string'),
max_length=100, max_length=100,
default='.*', default='',
blank=True, blank=True,
help_text=_('Regex compatible filter string for video titles') help_text=_('Regex compatible filter string for video titles')
) )
@ -545,6 +546,9 @@ class Source(models.Model):
except Exception as e: except Exception as e:
return '' return ''
def is_regex_match(self, media_item_title):
return bool(re.search(self.filter_text,media_item_title))
def index_media(self): def index_media(self):
''' '''
Index the media source returning a list of media metadata as dicts. Index the media source returning a list of media metadata as dicts.

View File

@ -1,5 +1,4 @@
import os import os
import re
from django.conf import settings from django.conf import settings
from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
from django.dispatch import receiver from django.dispatch import receiver
@ -109,34 +108,34 @@ def media_post_save(sender, instance, created, **kwargs):
published = instance.published published = instance.published
if instance.skip: if instance.skip:
#currently marked to be skipped, check if skip conditions still apply #currently marked to be skipped, check if skip conditions still apply
if not published: if not published:
log.debug(f'Media: {instance.source} / {instance} has no published date ' log.debug(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped') f'set but is already marked to be skipped')
else: else:
if max_cap_age and filter_text: if max_cap_age and filter_text:
if (published > max_cap_age) and (re.search(filter_text,instance.title)): if (published > max_cap_age) and (source.is_regex_match(instance.title)):
# Media was published after the cap date but is set to be skipped # Media was published after the cap date and matches the filter text, but is set to be skipped
print('Has a valid publishing date and matches filter, marking unskipped') print('Has a valid publishing date and matches filter, marking unskipped')
instance.skip = False instance.skip = False
cap_changed = True cap_changed = True
else: else:
print('does not have a valid publishing date or filter string, already marked skipped') print('does not have a valid publishing date or filter string, already marked skipped')
log.info(f'Media: {instance.source} / {instance} has no published date ' log.info(f'Media: {instance.source} / {instance} has no published date '
f'set but is already marked to be skipped') f'set but is already marked to be skipped')
elif max_cap_age: elif max_cap_age:
if published > max_cap_age: if published > max_cap_age:
# Media was published after the cap date but is set to be skipped # Media was published after the cap date but is set to be skipped
log.info(f'Media: {instance.source} / {instance} has a valid ' log.info(f'Media: {instance.source} / {instance} has a valid '
f'publishing date, marking to be unskipped') f'publishing date, marking to be unskipped')
instance.skip = False instance.skip = False
cap_changed = True cap_changed = True
elif filter_text: elif filter_text:
if re.search(filter_text,instance.title): if source.is_regex_match(instance.title):
# Media was published after the cap date but is set to be skipped # Media matches the filter text but is set to be skipped
log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped') log.info(f'Media: {instance.source} / {instance} matches the filter text, marking to be unskipped')
instance.skip = False instance.skip = False
cap_changed = True cap_changed = True
else: else:
if not published: if not published:
log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped') log.info(f'Media: {instance.source} / {instance} has no published date, marking to be skipped')

View File

@ -8,7 +8,6 @@ import os
import json import json
import math import math
import uuid import uuid
import re
from io import BytesIO from io import BytesIO
from hashlib import sha1 from hashlib import sha1
from datetime import timedelta, datetime from datetime import timedelta, datetime
@ -256,9 +255,9 @@ def download_media_metadata(media_id):
f'{max_cap_age}, skipping') f'{max_cap_age}, skipping')
media.skip = True media.skip = True
# If the source has a search filter, check the video title matches the filter # If the source has a search filter, check the video title matches the filter
if not re.search(source.filter_text,media.title): if source.filter_text and not source.is_regex_match(media.title):
# Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false # Filter text not found in the media title. Accepts regex string, blank search filter results in this returning false
log.warn(f'Media: {source} / {media} does not contain {source.filter_text}, skipping') log.warn(f'Media: {source} / {media} does not match {source.filter_text}, skipping')
media.skip = True media.skip = True
# If the source has a cut-off check the upload date is within the allowed delta # If the source has a cut-off check the upload date is within the allowed delta
if source.delete_old_media and source.days_to_keep > 0: if source.delete_old_media and source.days_to_keep > 0:

View File

@ -1471,6 +1471,29 @@ class FormatMatchingTestCase(TestCase):
self.media.get_best_video_format() self.media.get_best_video_format()
self.media.get_best_audio_format() self.media.get_best_audio_format()
def test_is_regex_match(self):
self.media.metadata = all_test_metadata['boring']
expected_matches = {
('.*'): (True),
('no fancy stuff'): (True),
('No fancy stuff'): (False),
('(?i)No fancy stuff'): (True), #set case insensitive flag
('no'): (True),
('Foo'): (False),
('^(?!.*fancy).*$'): (False),
('^(?!.*funny).*$'): (True),
('(?=.*f.*)(?=.{0,2}|.{4,})'): (True),
('f{4,}'): (False),
('^[^A-Z]*$'): (True),
('^[^a-z]*$'): (False),
('^[^\\s]*$'): (False)
}
for params, expected in expected_matches.items():
self.source.filter_text = params
expected_match_result = expected
self.assertEqual(self.source.is_regex_match(self.media.title), expected_match_result)
class TasksTestCase(TestCase): class TasksTestCase(TestCase):
def setUp(self): def setUp(self):