Subscriptions auto-checking system
This commit is contained in:
parent
38792081d4
commit
31a04555ae
@ -66,6 +66,12 @@ For security reasons, enabling this is not recommended.''',
|
|||||||
1 to sort by newest''',
|
1 to sort by newest''',
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
('autocheck_subscriptions', {
|
||||||
|
'type': bool,
|
||||||
|
'default': 0,
|
||||||
|
'comment': '',
|
||||||
|
}),
|
||||||
|
|
||||||
('gather_googlevideo_domains', {
|
('gather_googlevideo_domains', {
|
||||||
'type': bool,
|
'type': bool,
|
||||||
'default': False,
|
'default': False,
|
||||||
|
@ -12,6 +12,7 @@ import contextlib
|
|||||||
import defusedxml.ElementTree
|
import defusedxml.ElementTree
|
||||||
import urllib
|
import urllib
|
||||||
import math
|
import math
|
||||||
|
import secrets
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
from flask import request
|
from flask import request
|
||||||
@ -37,8 +38,8 @@ def open_database():
|
|||||||
yt_channel_id text UNIQUE NOT NULL,
|
yt_channel_id text UNIQUE NOT NULL,
|
||||||
channel_name text NOT NULL,
|
channel_name text NOT NULL,
|
||||||
time_last_checked integer,
|
time_last_checked integer,
|
||||||
muted integer DEFAULT 0,
|
next_check_time integer,
|
||||||
upload_frequency integer
|
muted integer DEFAULT 0
|
||||||
)''')
|
)''')
|
||||||
cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
|
cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
|
||||||
id integer PRIMARY KEY,
|
id integer PRIMARY KEY,
|
||||||
@ -227,6 +228,11 @@ def _channels_with_tag(cursor, tag, order=False, exclude_muted=False, include_mu
|
|||||||
|
|
||||||
return cursor.execute(statement, [tag]).fetchall()
|
return cursor.execute(statement, [tag]).fetchall()
|
||||||
|
|
||||||
|
def _schedule_checking(cursor, channel_id, next_check_time):
|
||||||
|
cursor.execute('''UPDATE subscribed_channels SET next_check_time = ? WHERE yt_channel_id = ?''', [int(next_check_time), channel_id])
|
||||||
|
|
||||||
|
def _is_muted(cursor, channel_id):
|
||||||
|
return bool(cursor.execute('''SELECT muted FROM subscribed_channels WHERE yt_channel_id=?''', [channel_id]).fetchone()[0])
|
||||||
|
|
||||||
units = {
|
units = {
|
||||||
'year': 31536000, # 365*24*3600
|
'year': 31536000, # 365*24*3600
|
||||||
@ -257,6 +263,9 @@ except FileNotFoundError:
|
|||||||
existing_thumbnails = set()
|
existing_thumbnails = set()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Manual checking system. Rate limited in order to support very large numbers of channels to be checked ---
|
||||||
|
# Auto checking system plugs into this for convenience, though it doesn't really need the rate limiting
|
||||||
|
|
||||||
check_channels_queue = util.RateLimitedQueue()
|
check_channels_queue = util.RateLimitedQueue()
|
||||||
checking_channels = set()
|
checking_channels = set()
|
||||||
|
|
||||||
@ -273,9 +282,66 @@ def check_channel_worker():
|
|||||||
|
|
||||||
for i in range(0,5):
|
for i in range(0,5):
|
||||||
gevent.spawn(check_channel_worker)
|
gevent.spawn(check_channel_worker)
|
||||||
|
# ----------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# --- Auto checking system ---
|
||||||
|
|
||||||
|
if settings.autocheck_subscriptions:
|
||||||
|
# job application format: dict with keys (channel_id, channel_name, next_check_time)
|
||||||
|
autocheck_job_application = gevent.queue.Queue() # only really meant to hold 1 item, just reusing gevent's wait and timeout machinery
|
||||||
|
|
||||||
|
autocheck_jobs = [] # list of dicts with the keys (channel_id, channel_name, next_check_time). Stores all the channels that need to be autochecked and when to check them
|
||||||
|
with open_database() as connection:
|
||||||
|
with connection as cursor:
|
||||||
|
now = time.time()
|
||||||
|
for row in cursor.execute('''SELECT yt_channel_id, channel_name, next_check_time FROM subscribed_channels WHERE next_check_time IS NOT NULL AND muted != 1''').fetchall():
|
||||||
|
if row[2] < now: # expired, check randomly within the 30 minutes
|
||||||
|
next_check_time = now + 3600*secrets.randbelow(60)/60
|
||||||
|
row = (row[0], row[1], next_check_time)
|
||||||
|
_schedule_checking(cursor, row[0], next_check_time)
|
||||||
|
autocheck_jobs.append({'channel_id': row[0], 'channel_name': row[1], 'next_check_time': row[2]})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def autocheck_dispatcher():
|
||||||
|
'''Scans the auto_check_list. Sleeps until the earliest job is due, then adds that channel to the checking queue above. Can be sent a new job through autocheck_job_application'''
|
||||||
|
while True:
|
||||||
|
if len(autocheck_jobs) == 0:
|
||||||
|
new_job = autocheck_job_application.get()
|
||||||
|
autocheck_jobs.append(new_job)
|
||||||
|
else:
|
||||||
|
earliest_job_index = min(range(0, len(autocheck_jobs)), key=lambda index: autocheck_jobs[index]['next_check_time']) # https://stackoverflow.com/a/11825864
|
||||||
|
earliest_job = autocheck_jobs[earliest_job_index]
|
||||||
|
time_until_earliest_job = earliest_job['next_check_time'] - time.time()
|
||||||
|
|
||||||
|
if time_until_earliest_job <= 0:
|
||||||
|
print('ERROR: autocheck_dispatcher got job scheduled in the past, skipping and rescheduling: ' + earliest_job['channel_id'] + ', ' + earliest_job['channel_name'] + ', ' + str(earliest_job['next_check_time']))
|
||||||
|
next_check_time = time.time() + 3600*secrets.randbelow(60)/60
|
||||||
|
with_open_db(_schedule_checking, earliest_job['channel_id'], next_check_time)
|
||||||
|
autocheck_jobs[earliest_job_index]['next_check_time'] = next_check_time
|
||||||
|
continue
|
||||||
|
|
||||||
|
# make sure it's not muted
|
||||||
|
if with_open_db(_is_muted, earliest_job['channel_id']):
|
||||||
|
del autocheck_jobs[earliest_job_index]
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_job = autocheck_job_application.get(timeout = time_until_earliest_job) # sleep for time_until_earliest_job time, but allow to be interrupted by new jobs
|
||||||
|
except gevent.queue.Empty: # no new jobs, time to execute the earliest job
|
||||||
|
channel_names[earliest_job['channel_id']] = earliest_job['channel_name']
|
||||||
|
checking_channels.add(earliest_job['channel_id'])
|
||||||
|
check_channels_queue.put(earliest_job['channel_id'])
|
||||||
|
del autocheck_jobs[earliest_job_index]
|
||||||
|
else: # new job, add it to the list
|
||||||
|
autocheck_jobs.append(new_job)
|
||||||
|
|
||||||
|
|
||||||
|
gevent.spawn(autocheck_dispatcher)
|
||||||
|
# ----------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_channels_if_necessary(channel_ids):
|
def check_channels_if_necessary(channel_ids):
|
||||||
@ -305,13 +371,31 @@ def _get_upstream_videos(channel_id):
|
|||||||
videos.append((channel_id, video_item['id'], video_item['title'], video_item['duration'], video_item['time_published'], video_item['description']))
|
videos.append((channel_id, video_item['id'], video_item['title'], video_item['duration'], video_item['time_published'], video_item['description']))
|
||||||
|
|
||||||
|
|
||||||
|
if len(videos) == 0:
|
||||||
|
average_upload_period = 4*7*24*3600 # assume 1 month for channel with no videos
|
||||||
|
elif len(videos) < 5:
|
||||||
|
average_upload_period = int((time.time() - videos[len(videos)-1][4])/len(videos))
|
||||||
|
else:
|
||||||
|
average_upload_period = int((time.time() - videos[4][4])/5) # equivalent to averaging the time between videos for the last 5 videos
|
||||||
|
|
||||||
|
# calculate when to check next for auto checking
|
||||||
|
# add some quantization and randomness to make pattern analysis by Youtube slightly harder
|
||||||
|
quantized_upload_period = average_upload_period - (average_upload_period % (4*3600)) + 4*3600 # round up to nearest 4 hours
|
||||||
|
randomized_upload_period = quantized_upload_period*(1 + secrets.randbelow(50)/50*0.5) # randomly between 1x and 1.5x
|
||||||
|
next_check_delay = randomized_upload_period/5 # check at 5x the channel posting rate. might want to fine tune this number
|
||||||
|
next_check_time = int(time.time() + next_check_delay)
|
||||||
|
|
||||||
with open_database() as connection:
|
with open_database() as connection:
|
||||||
with connection as cursor:
|
with connection as cursor:
|
||||||
cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description)
|
cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description)
|
||||||
VALUES ((SELECT id FROM subscribed_channels WHERE yt_channel_id=?), ?, ?, ?, ?, ?)''', videos)
|
VALUES ((SELECT id FROM subscribed_channels WHERE yt_channel_id=?), ?, ?, ?, ?, ?)''', videos)
|
||||||
cursor.execute('''UPDATE subscribed_channels
|
cursor.execute('''UPDATE subscribed_channels
|
||||||
SET time_last_checked = ?
|
SET time_last_checked = ?, next_check_time = ?
|
||||||
WHERE yt_channel_id=?''', [int(time.time()), channel_id])
|
WHERE yt_channel_id=?''', [int(time.time()), next_check_time, channel_id])
|
||||||
|
|
||||||
|
if settings.autocheck_subscriptions:
|
||||||
|
if not _is_muted(cursor, channel_id):
|
||||||
|
autocheck_job_application.put({'channel_id': channel_id, 'channel_name': channel_names[channel_id], 'next_check_time': next_check_time})
|
||||||
|
|
||||||
|
|
||||||
def check_all_channels():
|
def check_all_channels():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user