Make thumbnails work and other stuff

This commit is contained in:
James Taylor 2019-06-05 00:41:15 -07:00
parent ccb795e31f
commit ae5fd9eb00
4 changed files with 78 additions and 39 deletions

View File

@ -33,33 +33,7 @@ def add_to_playlist(name, video_info_list):
if id not in ids:
file.write(info + "\n")
missing_thumbnails.append(id)
gevent.spawn(download_thumbnails, name, missing_thumbnails)
def download_thumbnail(playlist_name, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
try:
thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
except urllib.error.HTTPError as e:
print("Failed to download thumbnail for " + video_id + ": " + str(e))
return
try:
f = open(save_location, 'wb')
except FileNotFoundError:
os.makedirs(os.path.join(thumbnails_directory, playlist_name))
f = open(save_location, 'wb')
f.write(thumbnail)
f.close()
def download_thumbnails(playlist_name, ids):
# only do 5 at a time
# do the n where n is divisible by 5
i = -1
for i in range(0, int(len(ids)/5) - 1 ):
gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5, i*5 + 5)])
# do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5 + 5, len(ids))])
gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails)
def get_local_playlist_page(name):
@ -84,7 +58,7 @@ def get_local_playlist_page(name):
videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
except json.decoder.JSONDecodeError:
pass
gevent.spawn(download_thumbnails, name, missing_thumbnails)
gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails)
return local_playlist_template.substitute(
page_title = name + ' - Local playlist',
header = html_common.get_header(),

View File

@ -3,16 +3,13 @@ import settings
from string import Template
import sqlite3
import os
import secrets
import datetime
import itertools
import time
import urllib
import socks, sockshandler
import gevent
with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f:
subscriptions_template = Template(f.read())
thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails")
# https://stackabuse.com/a-sqlite-tutorial-with-python/
@ -28,14 +25,14 @@ def open_database():
cursor = connection.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels (
id integer PRIMARY KEY,
channel_id text NOT NULL,
channel_id text UNIQUE NOT NULL,
channel_name text NOT NULL,
time_last_checked integer
)''')
cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
id integer PRIMARY KEY,
uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
video_id text NOT NULL,
video_id text UNIQUE NOT NULL,
title text NOT NULL,
duration text,
time_published integer NOT NULL,
@ -58,7 +55,7 @@ def _subscribe(channels):
connection = open_database()
try:
cursor = connection.cursor()
cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels)
cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels)
connection.commit()
except:
connection.rollback()
@ -104,6 +101,10 @@ def _get_videos(number, offset):
units = {
'year': 31536000, # 365*24*3600
'month': 2592000, # 30*24*3600
@ -126,6 +127,16 @@ def youtube_timestamp_to_posix(dumb_timestamp):
unit = unit[:-1] # remove s from end
return now - number*units[unit]
# Use this to mark a thumbnail acceptable to be retrieved at the request of the browser
downloading_thumbnails = set()
def download_thumbnails(thumbnails_directory, thumbnails):
try:
g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails)
g.join()
finally:
downloading_thumbnails.difference_update(thumbnails)
def _get_upstream_videos(channel_id):
videos = []
@ -136,12 +147,34 @@ def _get_upstream_videos(channel_id):
info['description'] = ''
info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order
videos.append(info)
try:
existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory))
except FileNotFoundError:
existing_thumbnails = set()
missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails
downloading_thumbnails.update(missing_thumbnails)
gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails)
return videos
def get_subscriptions_page(env, start_response):
items_html = '''<nav class="item-grid">\n'''
for item in _get_videos(30, 0):
print("Downloading_thumbnails: ", downloading_thumbnails)
if item['id'] in downloading_thumbnails:
item['thumbnail'] = util.get_thumbnail_url(item['id'])
else:
item['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + item['id'] + '.jpg'
items_html += html_common.video_item_html(item, html_common.small_video_item_template)
items_html += '''\n</nav>'''
@ -168,9 +201,9 @@ def post_subscriptions_page(env, start_response):
connection = open_database()
try:
cursor = connection.cursor()
for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels'''):
for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels''').fetchall():
db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) )
cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos)
cursor.executemany('''INSERT OR IGNORE INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos)
cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) )
connection.commit()

View File

@ -5,6 +5,8 @@ import brotli
import urllib.parse
import re
import time
import os
import gevent
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
@ -176,6 +178,36 @@ desktop_ua = (('User-Agent', desktop_user_agent),)
def download_thumbnail(save_directory, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg")
try:
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
except urllib.error.HTTPError as e:
print("Failed to download thumbnail for " + video_id + ": " + str(e))
return
try:
f = open(save_location, 'wb')
except FileNotFoundError:
os.makedirs(save_directory)
f = open(save_location, 'wb')
f.write(thumbnail)
f.close()
def download_thumbnails(save_directory, ids):
if not isinstance(ids, (list, tuple)):
ids = list(ids)
# only do 5 at a time
# do the n where n is divisible by 5
i = -1
for i in range(0, int(len(ids)/5) - 1 ):
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)])
# do the remainders (< 5)
gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))])
def dict_add(*dicts):

View File

@ -61,7 +61,7 @@ def youtube(env, start_response):
start_response('200 OK', (('Content-type',mime_type),) )
return f.read()
elif path.startswith("/data/playlist_thumbnails/"):
elif path.startswith('/data/playlist_thumbnails/') or path.startswith('/data/subscription_thumbnails/'):
with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f:
start_response('200 OK', (('Content-type', "image/jpeg"),) )
return f.read()