Make thumbnails work and other stuff

This commit is contained in:
James Taylor 2019-06-05 00:41:15 -07:00
parent ccb795e31f
commit ae5fd9eb00
4 changed files with 78 additions and 39 deletions

View File

@ -33,33 +33,7 @@ def add_to_playlist(name, video_info_list):
if id not in ids: if id not in ids:
file.write(info + "\n") file.write(info + "\n")
missing_thumbnails.append(id) missing_thumbnails.append(id)
gevent.spawn(download_thumbnails, name, missing_thumbnails) gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails)
def download_thumbnail(playlist_name, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
try:
thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
except urllib.error.HTTPError as e:
print("Failed to download thumbnail for " + video_id + ": " + str(e))
return
try:
f = open(save_location, 'wb')
except FileNotFoundError:
os.makedirs(os.path.join(thumbnails_directory, playlist_name))
f = open(save_location, 'wb')
f.write(thumbnail)
f.close()
def download_thumbnails(playlist_name, ids):
# only do 5 at a time
# do the n where n is divisible by 5
i = -1
for i in range(0, int(len(ids)/5) - 1 ):
gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5, i*5 + 5)])
# do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5 + 5, len(ids))])
def get_local_playlist_page(name): def get_local_playlist_page(name):
@ -84,7 +58,7 @@ def get_local_playlist_page(name):
videos_html += html_common.video_item_html(info, html_common.small_video_item_template) videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
pass pass
gevent.spawn(download_thumbnails, name, missing_thumbnails) gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails)
return local_playlist_template.substitute( return local_playlist_template.substitute(
page_title = name + ' - Local playlist', page_title = name + ' - Local playlist',
header = html_common.get_header(), header = html_common.get_header(),

View File

@ -3,16 +3,13 @@ import settings
from string import Template from string import Template
import sqlite3 import sqlite3
import os import os
import secrets
import datetime
import itertools
import time import time
import urllib import gevent
import socks, sockshandler
with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f:
subscriptions_template = Template(f.read()) subscriptions_template = Template(f.read())
thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails")
# https://stackabuse.com/a-sqlite-tutorial-with-python/ # https://stackabuse.com/a-sqlite-tutorial-with-python/
@ -28,14 +25,14 @@ def open_database():
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels (
id integer PRIMARY KEY, id integer PRIMARY KEY,
channel_id text NOT NULL, channel_id text UNIQUE NOT NULL,
channel_name text NOT NULL, channel_name text NOT NULL,
time_last_checked integer time_last_checked integer
)''') )''')
cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
id integer PRIMARY KEY, id integer PRIMARY KEY,
uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
video_id text NOT NULL, video_id text UNIQUE NOT NULL,
title text NOT NULL, title text NOT NULL,
duration text, duration text,
time_published integer NOT NULL, time_published integer NOT NULL,
@ -58,7 +55,7 @@ def _subscribe(channels):
connection = open_database() connection = open_database()
try: try:
cursor = connection.cursor() cursor = connection.cursor()
cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels)
connection.commit() connection.commit()
except: except:
connection.rollback() connection.rollback()
@ -104,6 +101,10 @@ def _get_videos(number, offset):
units = { units = {
'year': 31536000, # 365*24*3600 'year': 31536000, # 365*24*3600
'month': 2592000, # 30*24*3600 'month': 2592000, # 30*24*3600
@ -126,6 +127,16 @@ def youtube_timestamp_to_posix(dumb_timestamp):
unit = unit[:-1] # remove s from end unit = unit[:-1] # remove s from end
return now - number*units[unit] return now - number*units[unit]
# Use this to mark a thumbnail acceptable to be retrieved at the request of the browser
downloading_thumbnails = set()
def download_thumbnails(thumbnails_directory, thumbnails):
try:
g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails)
g.join()
finally:
downloading_thumbnails.difference_update(thumbnails)
def _get_upstream_videos(channel_id): def _get_upstream_videos(channel_id):
videos = [] videos = []
@ -136,12 +147,34 @@ def _get_upstream_videos(channel_id):
info['description'] = '' info['description'] = ''
info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order
videos.append(info) videos.append(info)
try:
existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory))
except FileNotFoundError:
existing_thumbnails = set()
missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails
downloading_thumbnails.update(missing_thumbnails)
gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails)
return videos return videos
def get_subscriptions_page(env, start_response): def get_subscriptions_page(env, start_response):
items_html = '''<nav class="item-grid">\n''' items_html = '''<nav class="item-grid">\n'''
for item in _get_videos(30, 0): for item in _get_videos(30, 0):
print("Downloading_thumbnails: ", downloading_thumbnails)
if item['id'] in downloading_thumbnails:
item['thumbnail'] = util.get_thumbnail_url(item['id'])
else:
item['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + item['id'] + '.jpg'
items_html += html_common.video_item_html(item, html_common.small_video_item_template) items_html += html_common.video_item_html(item, html_common.small_video_item_template)
items_html += '''\n</nav>''' items_html += '''\n</nav>'''
@ -168,9 +201,9 @@ def post_subscriptions_page(env, start_response):
connection = open_database() connection = open_database()
try: try:
cursor = connection.cursor() cursor = connection.cursor()
for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels'''): for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels''').fetchall():
db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) ) db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) )
cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) cursor.executemany('''INSERT OR IGNORE INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos)
cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) )
connection.commit() connection.commit()

View File

@ -5,6 +5,8 @@ import brotli
import urllib.parse import urllib.parse
import re import re
import time import time
import os
import gevent
# The trouble with the requests library: It ships its own certificate bundle via certifi # The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates # instead of using the system certificate store, meaning self-signed certificates
@ -176,6 +178,36 @@ desktop_ua = (('User-Agent', desktop_user_agent),)
def download_thumbnail(save_directory, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg")
try:
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
except urllib.error.HTTPError as e:
print("Failed to download thumbnail for " + video_id + ": " + str(e))
return
try:
f = open(save_location, 'wb')
except FileNotFoundError:
os.makedirs(save_directory)
f = open(save_location, 'wb')
f.write(thumbnail)
f.close()
def download_thumbnails(save_directory, ids):
if not isinstance(ids, (list, tuple)):
ids = list(ids)
# only do 5 at a time
# do the n where n is divisible by 5
i = -1
for i in range(0, int(len(ids)/5) - 1 ):
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
# do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
def dict_add(*dicts): def dict_add(*dicts):

View File

@ -61,7 +61,7 @@ def youtube(env, start_response):
start_response('200 OK', (('Content-type',mime_type),) ) start_response('200 OK', (('Content-type',mime_type),) )
return f.read() return f.read()
elif path.startswith("/data/playlist_thumbnails/"): elif path.startswith('/data/playlist_thumbnails/') or path.startswith('/data/subscription_thumbnails/'):
with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f: with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f:
start_response('200 OK', (('Content-type', "image/jpeg"),) ) start_response('200 OK', (('Content-type', "image/jpeg"),) )
return f.read() return f.read()