refactor common.py into 3 files

This commit is contained in:
James Taylor 2019-02-21 21:32:31 -08:00
parent a61ba6b8f4
commit b32330be4f
12 changed files with 519 additions and 478 deletions

View File

@ -1,10 +1,10 @@
# Contains functions having to do with logging in
from youtube import util, html_common
import settings
import urllib
import json
from youtube import common
import re
import settings
import http.cookiejar
import io
import os
@ -106,7 +106,7 @@ def get_account_login_page(env, start_response):
'''
page = '''
<form action="''' + common.URL_ORIGIN + '''/login" method="POST">
<form action="''' + util.URL_ORIGIN + '''/login" method="POST">
<div class="form-field">
<label for="username">Username:</label>
<input type="text" id="username" name="username">
@ -130,10 +130,10 @@ Using Tor to log in should only be done if the account was created using a proxy
</div>
'''
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Login",
style = style,
header = common.get_header(),
header = html_common.get_header(),
page = page,
).encode('utf-8')
@ -229,7 +229,7 @@ def _login(username, password, cookiejar, use_tor):
Taken from youtube-dl
"""
login_page = common.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
'''with open('debug/login_page', 'w', encoding='utf-8') as f:
f.write(login_page)'''
#print(cookiejar.as_lwp_str())
@ -255,7 +255,7 @@ def _login(username, password, cookiejar, use_tor):
'Google-Accounts-XSRF': 1,
}
headers.update(yt_dl_headers)
result = common.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
#print(cookiejar.as_lwp_str())
'''with open('debug/' + note, 'w', encoding='utf-8') as f:
f.write(result)'''
@ -387,7 +387,7 @@ def _login(username, password, cookiejar, use_tor):
return False
try:
check_cookie_results = common.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
return False
@ -398,7 +398,7 @@ def _login(username, password, cookiejar, use_tor):
warn('Unable to log in')
return False
select_site_page = common.fetch_url('https://m.youtube.com/select_site', headers=common.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
select_site_page = util.fetch_url('https://m.youtube.com/select_site', headers=util.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
match = _CHANNEL_ID_RE.search(select_site_page)
if match is None:
warn('Failed to find channel id')

View File

@ -1,6 +1,6 @@
import base64
import youtube.common as common
from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
from youtube import util, yt_data_extract, html_common
import http_errors
import urllib
import json
@ -91,7 +91,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
print("Sending channel tab ajax request")
content = common.fetch_url(url, common.desktop_ua + headers_1)
content = util.fetch_url(url, util.desktop_ua + headers_1)
print("Finished recieving channel tab response")
'''with open('debug/channel_debug', 'wb') as f:
@ -110,7 +110,7 @@ def get_number_of_videos(channel_id):
# Sometimes retrieving playlist info fails with 403 for no discernable reason
try:
response = common.fetch_url(url, common.mobile_ua + headers_pbj)
response = util.fetch_url(url, util.mobile_ua + headers_pbj)
except urllib.error.HTTPError as e:
if e.code != 403:
raise
@ -133,20 +133,20 @@ def get_channel_id(username):
# method that gives the smallest possible response at ~10 kb
# needs to be as fast as possible
url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
response = util.fetch_url(url, util.mobile_ua + headers_1).decode('utf-8')
return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
def grid_items_html(items, additional_info={}):
result = ''' <nav class="item-grid">\n'''
for item in items:
result += common.renderer_html(item, additional_info)
result += html_common.renderer_html(item, additional_info)
result += '''\n</nav>'''
return result
def list_items_html(items, additional_info={}):
result = ''' <nav class="item-list">'''
for item in items:
result += common.renderer_html(item, additional_info)
result += html_common.renderer_html(item, additional_info)
result += '''\n</nav>'''
return result
@ -168,11 +168,11 @@ def channel_tabs_html(channel_id, current_tab, search_box_value=''):
)
else:
result += channel_tab_template.substitute(
href_attribute = ' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
href_attribute = ' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
tab_name = tab_name,
)
result += channel_search_template.substitute(
action = URL_ORIGIN + "/channel/" + channel_id + "/search",
action = util.URL_ORIGIN + "/channel/" + channel_id + "/search",
search_box_value = html.escape(search_box_value),
)
return result
@ -192,7 +192,7 @@ def channel_sort_buttons_html(channel_id, tab, current_sort):
)
else:
result += channel_sort_button_template.substitute(
href_attribute=' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
href_attribute=' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
text = 'Sort by ' + sort_name
)
return result
@ -246,14 +246,14 @@ def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_
items_html = grid_items_html(items, {'author': microformat['title']})
return yt_channel_items_template.substitute(
header = common.get_header(),
header = html_common.get_header(),
channel_title = microformat['title'],
channel_tabs = channel_tabs_html(channel_id, 'Videos'),
sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort),
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
page_title = microformat['title'] + ' - Channel',
items = items_html,
page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/30), util.URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
number_of_results = '{:,}'.format(number_of_videos) + " videos",
)
@ -267,7 +267,7 @@ def channel_playlists_html(polymer_json, current_sort=3):
items_html = grid_items_html(items, {'author': microformat['title']})
return yt_channel_items_template.substitute(
header = common.get_header(),
header = html_common.get_header(),
channel_title = microformat['title'],
channel_tabs = channel_tabs_html(channel_id, 'Playlists'),
sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort),
@ -310,25 +310,25 @@ def channel_about_page(polymer_json):
channel_links += channel_link_template.substitute(
url = html.escape(url),
text = common.get_plain_text(link_json['title']),
text = yt_data_extract.get_plain_text(link_json['title']),
)
stats = ''
for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
try:
stat_value = common.get_plain_text(channel_metadata[stat_name])
stat_value = yt_data_extract.get_plain_text(channel_metadata[stat_name])
except KeyError:
continue
else:
stats += stat_template.substitute(stat_value=stat_value)
try:
description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
description = yt_data_extract.format_text_runs(yt_data_extract.get_formatted_text(channel_metadata['description']))
except KeyError:
description = ''
return yt_channel_about_template.substitute(
header = common.get_header(),
page_title = common.get_plain_text(channel_metadata['title']) + ' - About',
channel_title = common.get_plain_text(channel_metadata['title']),
header = html_common.get_header(),
page_title = yt_data_extract.get_plain_text(channel_metadata['title']) + ' - About',
channel_title = yt_data_extract.get_plain_text(channel_metadata['title']),
avatar = html.escape(avatar),
description = description,
links = channel_links,
@ -351,13 +351,13 @@ def channel_search_page(polymer_json, query, current_page=1, number_of_videos =
items_html = list_items_html(items)
return yt_channel_items_template.substitute(
header = common.get_header(),
header = html_common.get_header(),
channel_title = html.escape(microformat['title']),
channel_tabs = channel_tabs_html(channel_id, '', query),
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
page_title = html.escape(query + ' - Channel search'),
items = items_html,
page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/29), URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/29), util.URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
number_of_results = '',
sort_buttons = '',
)
@ -367,7 +367,7 @@ def get_channel_search_json(channel_id, query, page):
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
polymer_json = common.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, common.desktop_ua + headers_1)
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1)
'''with open('debug/channel_search_debug', 'wb') as f:
f.write(polymer_json)'''
polymer_json = json.loads(polymer_json)
@ -384,10 +384,10 @@ def get_channel_page(env, start_response):
tab = 'videos'
parameters = env['parameters']
page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
sort = common.default_multi_get(parameters, 'sort', 0, default='3')
view = common.default_multi_get(parameters, 'view', 0, default='1')
query = common.default_multi_get(parameters, 'query', 0, default='')
page_number = int(util.default_multi_get(parameters, 'page', 0, default='1'))
sort = util.default_multi_get(parameters, 'sort', 0, default='3')
view = util.default_multi_get(parameters, 'view', 0, default='1')
query = util.default_multi_get(parameters, 'query', 0, default='')
if tab == 'videos':
tasks = (
@ -399,11 +399,11 @@ def get_channel_page(env, start_response):
result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
elif tab == 'about':
polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', common.desktop_ua + headers_1)
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
polymer_json = json.loads(polymer_json)
result = channel_about_page(polymer_json)
elif tab == 'playlists':
polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], common.desktop_ua + headers_1)
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
'''with open('debug/channel_playlists_debug', 'wb') as f:
f.write(polymer_json)'''
polymer_json = json.loads(polymer_json)
@ -443,22 +443,22 @@ def get_channel_page_general_url(env, start_response):
return b'Invalid channel url'
if page == 'videos':
polymer_json = common.fetch_url(base_url + '/videos?pbj=1&view=0', common.desktop_ua + headers_1)
polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1)
'''with open('debug/user_page_videos', 'wb') as f:
f.write(polymer_json)'''
polymer_json = json.loads(polymer_json)
result = channel_videos_html(polymer_json)
elif page == 'about':
polymer_json = common.fetch_url(base_url + '/about?pbj=1', common.desktop_ua + headers_1)
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1)
polymer_json = json.loads(polymer_json)
result = channel_about_page(polymer_json)
elif page == 'playlists':
polymer_json = common.fetch_url(base_url+ '/playlists?pbj=1&view=1', common.desktop_ua + headers_1)
polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1)
polymer_json = json.loads(polymer_json)
result = channel_playlists_html(polymer_json)
elif page == 'search':
raise NotImplementedError()
'''polymer_json = common.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, common.desktop_ua + headers_1)
'''polymer_json = util.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, util.desktop_ua + headers_1)
polymer_json = json.loads(polymer_json)
return channel_search_page('''
else:

View File

@ -1,13 +1,14 @@
from youtube import proto, util, html_common, yt_data_extract, accounts
import settings
import json
from youtube import proto, common, accounts
import base64
from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
from string import Template
import urllib.request
import urllib
import html
import settings
import re
comment_area_template = Template('''
<section class="comment-area">
$video-metadata
@ -130,7 +131,7 @@ def request_comments(ctoken, replies=False):
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
for i in range(0,8): # don't retry more than 8 times
content = fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
content = content[4:]
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
@ -151,10 +152,10 @@ def single_comment_ctoken(video_id, comment_id):
def parse_comments_ajax(content, replies=False):
try:
content = json.loads(uppercase_escape(content.decode('utf-8')))
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
#print(content)
comments_raw = content['content']['continuation_contents']['contents']
ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
ctoken = util.default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
comments = []
for comment_raw in comments_raw:
@ -163,7 +164,7 @@ def parse_comments_ajax(content, replies=False):
if comment_raw['replies'] is not None:
reply_ctoken = comment_raw['replies']['continuations'][0]['continuation']
comment_id, video_id = get_ids(reply_ctoken)
replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
comment_raw = comment_raw['comment']
comment = {
'author': comment_raw['author']['runs'][0]['text'],
@ -189,7 +190,7 @@ reply_count_regex = re.compile(r'(\d+)')
def parse_comments_polymer(content, replies=False):
try:
video_title = ''
content = json.loads(uppercase_escape(content.decode('utf-8')))
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
url = content[1]['url']
ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
video_id = ctoken_metadata(ctoken)['video_id']
@ -200,7 +201,7 @@ def parse_comments_polymer(content, replies=False):
comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']
replies = True
ctoken = default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
comments = []
for comment_raw in comments_raw:
@ -219,8 +220,8 @@ def parse_comments_polymer(content, replies=False):
if 'replies' in comment_raw:
#reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
#comment_id, video_id = get_ids(reply_ctoken)
replies_url = URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
view_replies_text = common.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
view_replies_text = yt_data_extract.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
match = reply_count_regex.search(view_replies_text)
if match is None:
view_replies_text = '1 reply'
@ -228,18 +229,18 @@ def parse_comments_polymer(content, replies=False):
view_replies_text = match.group(1) + " replies"
elif not replies:
view_replies_text = "Reply"
replies_url = URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
comment_raw = comment_raw['comment']
comment_raw = comment_raw['commentRenderer']
comment = {
'author': common.get_plain_text(comment_raw['authorText']),
'author': yt_data_extract.get_plain_text(comment_raw['authorText']),
'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'],
'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'],
'author_id': comment_raw['authorId'],
'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'],
'likes': comment_raw['likeCount'],
'published': common.get_plain_text(comment_raw['publishedTimeText']),
'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']),
'text': comment_raw['contentText'].get('runs', ''),
'view_replies_text': view_replies_text,
'replies_url': replies_url,
@ -264,13 +265,13 @@ def get_comments_html(comments):
replies = reply_link_template.substitute(url=comment['replies_url'], view_replies_text=html.escape(comment['view_replies_text']))
if settings.enable_comment_avatars:
avatar = comment_avatar_template.substitute(
author_url = URL_ORIGIN + comment['author_url'],
author_url = util.URL_ORIGIN + comment['author_url'],
author_avatar = '/' + comment['author_avatar'],
)
else:
avatar = ''
if comment['author_channel_id'] in accounts.accounts:
delete_url = (URL_ORIGIN + '/delete_comment?video_id='
delete_url = (util.URL_ORIGIN + '/delete_comment?video_id='
+ comment['video_id']
+ '&channel_id='+ comment['author_channel_id']
+ '&author_id=' + comment['author_id']
@ -280,14 +281,14 @@ def get_comments_html(comments):
else:
action_buttons = ''
permalink = URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
permalink = util.URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
html_result += comment_template.substitute(
author=comment['author'],
author_url = URL_ORIGIN + comment['author_url'],
author_url = util.URL_ORIGIN + comment['author_url'],
avatar = avatar,
likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
published = comment['published'],
text = format_text_runs(comment['text']),
text = yt_data_extract.format_text_runs(comment['text']),
datetime = '', #TODO
replies = replies,
action_buttons = action_buttons,
@ -297,10 +298,10 @@ def get_comments_html(comments):
def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
if settings.enable_comments:
post_comment_url = common.URL_ORIGIN + "/post_comment?video_id=" + video_id
post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id
post_comment_link = '''<a class="sort-button" href="''' + post_comment_url + '''">Post comment</a>'''
other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
other_sort_name = 'newest' if sort == 0 else 'top'
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
@ -314,7 +315,7 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
if ctoken == '':
more_comments_button = ''
else:
more_comments_button = more_comments_template.substitute(url = common.URL_ORIGIN + '/comments?ctoken=' + ctoken)
more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
result = '''<section class="comments-area">\n'''
result += comment_links + '\n'
@ -350,7 +351,7 @@ comment_box_template = Template('''
<select id="account-selection" name="channel_id">
$options
</select>
<a href="''' + common.URL_ORIGIN + '''/login" target="_blank">Add account</a>
<a href="''' + util.URL_ORIGIN + '''/login" target="_blank">Add account</a>
</div>
<textarea name="comment_text"></textarea>
$video_id_input
@ -359,7 +360,7 @@ $options
def get_comments_page(env, start_response):
start_response('200 OK', [('Content-type','text/html'),] )
parameters = env['parameters']
ctoken = default_multi_get(parameters, 'ctoken', 0, default='')
ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='')
replies = False
if not ctoken:
video_id = parameters['video_id'][0]
@ -384,17 +385,17 @@ def get_comments_page(env, start_response):
page_number = page_number,
sort = 'top' if metadata['sort'] == 0 else 'newest',
title = html.escape(comment_info['video_title']),
url = common.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
url = util.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg',
)
comment_box = comment_box_template.substitute(
form_action= common.URL_ORIGIN + '/post_comment',
form_action= util.URL_ORIGIN + '/post_comment',
video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''',
post_text='Post comment',
options=comment_box_account_options(),
)
other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
other_sort_name = 'newest' if metadata['sort'] == 0 else 'top'
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
@ -408,7 +409,7 @@ def get_comments_page(env, start_response):
if ctoken == '':
more_comments_button = ''
else:
more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
comments_area = '<section class="comments-area">\n'
comments_area += video_metadata + comment_box + comment_links + '\n'
comments_area += '<div class="comments">\n'
@ -417,7 +418,7 @@ def get_comments_page(env, start_response):
comments_area += more_comments_button + '\n'
comments_area += '</section>\n'
return yt_comments_template.substitute(
header = common.get_header(),
header = html_common.get_header(),
comments_area = comments_area,
page_title = page_title,
).encode('utf-8')

View File

@ -1,46 +1,8 @@
from youtube.template import Template
from youtube import local_playlist
import settings
import html
from youtube import local_playlist, yt_data_extract, util
import json
import re
import urllib.parse
import gzip
import brotli
import time
import socks, sockshandler
URL_ORIGIN = "/https://www.youtube.com"
# videos (all of type str):
# id
# title
# url
# author
# author_url
# thumbnail
# description
# published
# duration
# likes
# dislikes
# views
# playlist_index
# playlists:
# id
# title
# url
# author
# author_url
# thumbnail
# description
# updated
# size
# first_video_id
import html
with open('yt_basic_template.html', 'r', encoding='utf-8') as file:
@ -139,153 +101,6 @@ medium_channel_item_template = Template('''
''')
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
import http.cookiejar
self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive
def http_request(self, request):
if self.cookiejar_send is not None:
self.cookiejar_send.add_cookie_header(request)
return request
def http_response(self, request, response):
if self.cookiejar_receive is not None:
self.cookiejar_receive.extract_cookies(response, request)
return response
https_request = http_request
https_response = http_response
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings):
if encoding == 'identity':
continue
if encoding == 'br':
content = brotli.decompress(content)
elif encoding == 'gzip':
content = gzip.decompress(content)
return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
'''
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object,
cookies received in the response will be merged into the object (nothing will be sent from it)
When both are set to the same object, cookies will be sent from the object,
and response cookies will be merged into it.
'''
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
headers['Accept-Encoding'] = 'gzip, br'
# prevent python version being leaked by urllib if User-Agent isn't provided
# (urllib will use ex. Python-urllib/3.6 otherwise)
if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
headers['User-Agent'] = 'Python-urllib'
if data is not None:
if isinstance(data, str):
data = data.encode('ascii')
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii')
start_time = time.time()
req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
if use_tor and settings.route_tor:
opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
else:
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
response_time = time.time()
content = response.read()
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
return content
mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),)
def dict_add(*dicts):
for dictionary in dicts[1:]:
dicts[0].update(dictionary)
return dicts[0]
def video_id(url):
url_parts = urllib.parse.urlparse(url)
return urllib.parse.parse_qs(url_parts.query)['v'][0]
def uppercase_escape(s):
return re.sub(
r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s)
def default_multi_get(object, *keys, default):
''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
try:
for key in keys:
object = object[key]
return object
except (IndexError, KeyError):
return default
def get_plain_text(node):
try:
return html.escape(node['simpleText'])
except KeyError:
return unformmated_text_runs(node['runs'])
def unformmated_text_runs(runs):
result = ''
for text_run in runs:
result += html.escape(text_run["text"])
return result
def format_text_runs(runs):
if isinstance(runs, str):
return runs
result = ''
for text_run in runs:
if text_run.get("bold", False):
result += "<b>" + html.escape(text_run["text"]) + "</b>"
elif text_run.get('italics', False):
result += "<i>" + html.escape(text_run["text"]) + "</i>"
else:
result += html.escape(text_run["text"])
return result
# default, sddefault, mqdefault, hqdefault, hq720
def get_thumbnail_url(video_id):
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
def seconds_to_timestamp(seconds):
seconds = int(seconds)
hours, seconds = divmod(seconds,3600)
minutes, seconds = divmod(seconds,60)
if hours != 0:
timestamp = str(hours) + ":"
timestamp += str(minutes).zfill(2) # zfill pads with zeros
else:
timestamp = str(minutes)
timestamp += ":" + str(seconds).zfill(2)
return timestamp
# -----
@ -299,8 +114,8 @@ def small_video_item_html(item):
views = item["views"],
author = html.escape(item["author"]),
duration = item["duration"],
url = URL_ORIGIN + "/watch?v=" + item["id"],
thumbnail = get_thumbnail_url(item['id']),
url = util.URL_ORIGIN + "/watch?v=" + item["id"],
thumbnail = util.get_thumbnail_url(item['id']),
video_info = html.escape(video_info),
)
@ -309,8 +124,8 @@ def small_playlist_item_html(item):
title=html.escape(item["title"]),
size = item['size'],
author="",
url = URL_ORIGIN + "/playlist?list=" + item["id"],
thumbnail= get_thumbnail_url(item['first_video_id']),
url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
thumbnail= util.get_thumbnail_url(item['first_video_id']),
)
def medium_playlist_item_html(item):
@ -318,8 +133,8 @@ def medium_playlist_item_html(item):
title=html.escape(item["title"]),
size = item['size'],
author=item['author'],
author_url= URL_ORIGIN + item['author_url'],
url = URL_ORIGIN + "/playlist?list=" + item["id"],
author_url= util.URL_ORIGIN + item['author_url'],
url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
thumbnail= item['thumbnail'],
)
@ -330,11 +145,11 @@ def medium_video_item_html(medium_video_info):
title=html.escape(info["title"]),
views=info["views"],
published = info["published"],
description = format_text_runs(info["description"]),
description = yt_data_extract.format_text_runs(info["description"]),
author=html.escape(info["author"]),
author_url=info["author_url"],
duration=info["duration"],
url = URL_ORIGIN + "/watch?v=" + info["id"],
url = util.URL_ORIGIN + "/watch?v=" + info["id"],
thumbnail=info['thumbnail'],
datetime='', # TODO
)
@ -440,158 +255,28 @@ def get_header(search_box_value=""):
def get_url(node):
try:
return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
except KeyError:
return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
def get_text(node):
try:
return node['simpleText']
except KeyError:
pass
try:
return node['runs'][0]['text']
except IndexError: # empty text runs
return ''
def get_formatted_text(node):
try:
return node['runs']
except KeyError:
return node['simpleText']
def get_badges(node):
badges = []
for badge_node in node:
badge = badge_node['metadataBadgeRenderer']['label']
if badge.lower() != 'new':
badges.append(badge)
return badges
def get_thumbnail(node):
try:
return node['thumbnails'][0]['url'] # polymer format
except KeyError:
return node['url'] # ajax format
dispatch = {
# polymer format
'title': ('title', get_text),
'publishedTimeText': ('published', get_text),
'videoId': ('id', lambda node: node),
'descriptionSnippet': ('description', get_formatted_text),
'lengthText': ('duration', get_text),
'thumbnail': ('thumbnail', get_thumbnail),
'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
'viewCountText': ('views', get_text),
'numVideosText': ('size', lambda node: get_text(node).split(' ')[0]), # the format is "324 videos"
'videoCountText': ('size', get_text),
'playlistId': ('id', lambda node: node),
'descriptionText': ('description', get_formatted_text),
'subscriberCountText': ('subscriber_count', get_text),
'channelId': ('id', lambda node: node),
'badges': ('badges', get_badges),
# ajax format
'view_count_text': ('views', get_text),
'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
'owner_text': ('author', get_text),
'owner_endpoint': ('author_url', lambda node: node['url']),
'description': ('description', get_formatted_text),
'index': ('playlist_index', get_text),
'short_byline': ('author', get_text),
'length': ('duration', get_text),
'video_id': ('id', lambda node: node),
}
def renderer_info(renderer):
try:
info = {}
if 'viewCountText' in renderer: # prefer this one as it contains all the digits
info['views'] = get_text(renderer['viewCountText'])
elif 'shortViewCountText' in renderer:
info['views'] = get_text(renderer['shortViewCountText'])
if 'ownerText' in renderer:
info['author'] = renderer['ownerText']['runs'][0]['text']
info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
try:
overlays = renderer['thumbnailOverlays']
except KeyError:
pass
else:
for overlay in overlays:
if 'thumbnailOverlayTimeStatusRenderer' in overlay:
info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
# show renderers don't have videoCountText
elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
# show renderers don't have playlistId, have to dig into the url to get it
try:
info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
except KeyError:
pass
for key, node in renderer.items():
if key in ('longBylineText', 'shortBylineText'):
info['author'] = get_text(node)
try:
info['author_url'] = get_url(node)
except KeyError:
pass
# show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
else:
try:
simple_key, function = dispatch[key]
except KeyError:
continue
info[simple_key] = function(node)
return info
except KeyError:
print(renderer)
raise
def ajax_info(item_json):
try:
info = {}
for key, node in item_json.items():
try:
simple_key, function = dispatch[key]
except KeyError:
continue
info[simple_key] = function(node)
return info
except KeyError:
print(item_json)
raise
def badges_html(badges):
return ' | '.join(map(html.escape, badges))
html_transform_dispatch = {
'title': html.escape,
'published': html.escape,
'id': html.escape,
'description': format_text_runs,
'description': yt_data_extract.format_text_runs,
'duration': html.escape,
'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
'size': html.escape,
'author': html.escape,
'author_url': lambda url: html.escape(URL_ORIGIN + url),
'author_url': lambda url: html.escape(util.URL_ORIGIN + url),
'views': html.escape,
'subscriber_count': html.escape,
'badges': badges_html,
@ -645,7 +330,7 @@ def video_item_html(item, template, html_exclude=set()):
html_ready = get_html_ready(item)
html_ready['video_info'] = html.escape(json.dumps(video_info) )
html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
html_ready['url'] = util.URL_ORIGIN + "/watch?v=" + html_ready['id']
html_ready['datetime'] = '' #TODO
for key in html_exclude:
@ -658,7 +343,7 @@ def video_item_html(item, template, html_exclude=set()):
def playlist_item_html(item, template, html_exclude=set()):
html_ready = get_html_ready(item)
html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
html_ready['url'] = util.URL_ORIGIN + "/playlist?list=" + html_ready['id']
html_ready['datetime'] = '' #TODO
for key in html_exclude:
@ -672,10 +357,6 @@ def playlist_item_html(item, template, html_exclude=set()):
def update_query_string(query_string, items):
parameters = urllib.parse.parse_qs(query_string)
parameters.update(items)
return urllib.parse.urlencode(parameters, doseq=True)
page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
current_page_button_template = Template('''<div class="page-button">$page</div>''')
@ -694,7 +375,7 @@ def page_buttons_html(current_page, estimated_pages, url, current_query_string):
template = current_page_button_template
else:
template = page_button_template
result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
result += template.substitute(page=page, href = url + "?" + util.update_query_string(current_query_string, {'page': [str(page)]}) )
return result
@ -723,15 +404,15 @@ def renderer_html(renderer, additional_info={}, current_query_string=''):
return renderer_html(renderer['contents'][0], additional_info, current_query_string)
if type == 'channelRenderer':
info = renderer_info(renderer)
info = yt_data_extract.renderer_info(renderer)
html_ready = get_html_ready(info)
html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
html_ready['url'] = util.URL_ORIGIN + "/channel/" + html_ready['id']
return medium_channel_item_template.substitute(html_ready)
if type in ('movieRenderer', 'clarificationRenderer'):
return ''
info = renderer_info(renderer)
info = yt_data_extract.renderer_info(renderer)
info.update(additional_info)
html_exclude = set(additional_info.keys())
if type == 'compactVideoRenderer':
@ -745,4 +426,4 @@ def renderer_html(renderer, additional_info={}, current_query_string=''):
#print(renderer)
#raise NotImplementedError('Unknown renderer type: ' + type)
return ''
return ''

View File

@ -1,11 +1,12 @@
from youtube.template import Template
from youtube import util, html_common
import settings
import os
import json
from youtube.template import Template
from youtube import common
import html
import gevent
import urllib
import settings
playlists_directory = os.path.join(settings.data_dir, "playlists")
thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
@ -38,7 +39,7 @@ def download_thumbnail(playlist_name, video_id):
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
try:
thumbnail = common.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
except urllib.error.HTTPError as e:
print("Failed to download thumbnail for " + video_id + ": " + str(e))
return
@ -78,15 +79,15 @@ def get_local_playlist_page(name):
if info['id'] + ".jpg" in thumbnails:
info['thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info['id'] + ".jpg"
else:
info['thumbnail'] = common.get_thumbnail_url(info['id'])
info['thumbnail'] = util.get_thumbnail_url(info['id'])
missing_thumbnails.append(info['id'])
videos_html += common.video_item_html(info, common.small_video_item_template)
videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
except json.decoder.JSONDecodeError:
pass
gevent.spawn(download_thumbnails, name, missing_thumbnails)
return local_playlist_template.substitute(
page_title = name + ' - Local playlist',
header = common.get_header(),
header = html_common.get_header(),
videos = videos_html,
title = name,
page_buttons = ''
@ -127,11 +128,11 @@ def get_playlists_list_page():
page = '''<ul>\n'''
list_item_template = Template(''' <li><a href="$url">$name</a></li>\n''')
for name in get_playlist_names():
page += list_item_template.substitute(url = html.escape(common.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
page += list_item_template.substitute(url = html.escape(util.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
page += '''</ul>\n'''
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Local playlists",
header = common.get_header(),
header = html_common.get_header(),
style = '',
page = page,
)
@ -151,7 +152,7 @@ def path_edit_playlist(env, start_response):
if parameters['action'][0] == 'remove':
playlist_name = env['path_parts'][1]
remove_from_playlist(playlist_name, parameters['video_info_list'])
start_response('303 See Other', [('Location', common.URL_ORIGIN + env['PATH_INFO']),] )
start_response('303 See Other', [('Location', util.URL_ORIGIN + env['PATH_INFO']),] )
return b''
else:

View File

@ -1,10 +1,9 @@
from youtube import util, yt_data_extract, html_common, template, proto
import base64
import youtube.common as common
import urllib
import json
import string
from youtube import template
import youtube.proto as proto
import gevent
import math
@ -49,10 +48,10 @@ headers_1 = (
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
content = common.fetch_url(url, common.mobile_ua + headers_1, report_text=report_text)
content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text)
'''with open('debug/playlist_debug', 'wb') as f:
f.write(content)'''
content = json.loads(common.uppercase_escape(content.decode('utf-8')))
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
return content
@ -69,11 +68,11 @@ def get_videos(playlist_id, page):
'X-YouTube-Client-Version': '2.20180508',
}
content = common.fetch_url(url, headers, report_text="Retrieved playlist")
content = util.fetch_url(url, headers, report_text="Retrieved playlist")
'''with open('debug/playlist_debug', 'wb') as f:
f.write(content)'''
info = json.loads(common.uppercase_escape(content.decode('utf-8')))
info = json.loads(util.uppercase_escape(content.decode('utf-8')))
return info
@ -101,22 +100,22 @@ def get_playlist_page(env, start_response):
video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
videos_html = ''
for video_json in video_list:
info = common.renderer_info(video_json['playlistVideoRenderer'])
videos_html += common.video_item_html(info, common.small_video_item_template)
info = yt_data_extract.renderer_info(video_json['playlistVideoRenderer'])
videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
metadata = common.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
metadata = yt_data_extract.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
video_count = int(metadata['size'].replace(',', ''))
page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
page_buttons = html_common.page_buttons_html(int(page), math.ceil(video_count/20), util.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
html_ready = common.get_html_ready(metadata)
html_ready = html_common.get_html_ready(metadata)
html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
stats = ''
stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
stats += playlist_stat_template.substitute(stat=html_ready['views'])
return yt_playlist_template.substitute(
header = common.get_header(),
header = html_common.get_header(),
videos = videos_html,
page_buttons = page_buttons,
stats = stats,

View File

@ -1,11 +1,11 @@
# Contains functions having to do with posting/editing/deleting comments
from youtube import util, html_common, proto, comments, accounts
import settings
import urllib
import json
from youtube import common, proto, comments, accounts
import re
import traceback
import settings
import os
def _post_comment(text, video_id, session_token, cookiejar):
@ -31,7 +31,7 @@ def _post_comment(text, video_id, session_token, cookiejar):
data = urllib.parse.urlencode(data_dict).encode()
content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
code = json.loads(content)['code']
print("Comment posting code: " + code)
@ -62,7 +62,7 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie
}
data = urllib.parse.urlencode(data_dict).encode()
content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
code = json.loads(content)['code']
print("Comment posting code: " + code)
@ -90,7 +90,7 @@ def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
}
data = urllib.parse.urlencode(data_dict).encode()
content = common.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
content = util.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
code = json.loads(content)['code']
print("Comment deletion code: " + code)
return code
@ -101,8 +101,8 @@ def get_session_token(video_id, cookiejar):
# youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr"
# Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests
# Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered.
headers = {'User-Agent': common.mobile_user_agent}
mobile_page = common.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
headers = {'User-Agent': util.mobile_user_agent}
mobile_page = util.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
match = xsrf_token_regex.search(mobile_page)
if match:
return match.group(1).replace("%3D", "=")
@ -118,9 +118,9 @@ def delete_comment(env, start_response):
code = _delete_comment(video_id, parameters['comment_id'][0], parameters['author_id'][0], token, cookiejar)
if code == "SUCCESS":
start_response('303 See Other', [('Location', common.URL_ORIGIN + '/comment_delete_success'),] )
start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_success'),] )
else:
start_response('303 See Other', [('Location', common.URL_ORIGIN + '/comment_delete_fail'),] )
start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_fail'),] )
def post_comment(env, start_response):
parameters = env['parameters']
@ -131,11 +131,11 @@ def post_comment(env, start_response):
if 'parent_id' in parameters:
code = _post_comment_reply(parameters['comment_text'][0], parameters['video_id'][0], parameters['parent_id'][0], token, cookiejar)
start_response('303 See Other', (('Location', common.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
else:
code = _post_comment(parameters['comment_text'][0], parameters['video_id'][0], token, cookiejar)
start_response('303 See Other', (('Location', common.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
return b''
@ -163,10 +163,10 @@ def get_delete_comment_page(env, start_response):
page += '''
<input type="submit" value="Yes, delete it">
</form>'''
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Delete comment?",
style = style,
header = common.get_header(),
header = html_common.get_header(),
page = page,
).encode('utf-8')
@ -174,7 +174,7 @@ def get_post_comment_page(env, start_response):
start_response('200 OK', [('Content-type','text/html'),])
parameters = env['parameters']
video_id = parameters['video_id'][0]
parent_id = common.default_multi_get(parameters, 'parent_id', 0, default='')
parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='')
style = ''' main{
display: grid;
@ -194,23 +194,23 @@ textarea{
}'''
if parent_id: # comment reply
comment_box = comments.comment_box_template.substitute(
form_action = common.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
video_id_input = '',
post_text = "Post reply",
options=comments.comment_box_account_options(),
)
else:
comment_box = comments.comment_box_template.substitute(
form_action = common.URL_ORIGIN + '/post_comment',
form_action = util.URL_ORIGIN + '/post_comment',
video_id_input = '''<input type="hidden" name="video_id" value="''' + video_id + '''">''',
post_text = "Post comment",
options=comments.comment_box_account_options(),
)
page = '''<div class="left">\n''' + comment_box + '''</div>\n'''
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Post comment reply" if parent_id else "Post a comment",
style = style,
header = common.get_header(),
header = html_common.get_header(),
page = page,
).encode('utf-8')

View File

@ -1,11 +1,12 @@
from youtube import util, html_common, yt_data_extract, proto
import json
import urllib
import html
from string import Template
import base64
from math import ceil
from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
from youtube import common, proto
with open("yt_search_results_template.html", "r") as file:
yt_search_results_template = file.read()
@ -54,7 +55,7 @@ def get_search_json(query, page, autocorrect, sort, filters):
'X-YouTube-Client-Version': '2.20180418',
}
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
content = common.fetch_url(url, headers=headers, report_text="Got search results")
content = util.fetch_url(url, headers=headers, report_text="Got search results")
info = json.loads(content)
return info
@ -70,9 +71,9 @@ def get_search_page(env, start_response):
start_response('200 OK', [('Content-type','text/html'),])
parameters = env['parameters']
if len(parameters) == 0:
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Search",
header = common.get_header(),
header = html_common.get_header(),
style = '',
page = '',
).encode('utf-8')
@ -100,24 +101,24 @@ def get_search_page(env, start_response):
renderer = renderer[type]
corrected_query_string = parameters.copy()
corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
corrected_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
corrections = did_you_mean.substitute(
corrected_query_url = corrected_query_url,
corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
)
continue
if type == 'showingResultsForRenderer':
renderer = renderer[type]
no_autocorrect_query_string = parameters.copy()
no_autocorrect_query_string['autocorrect'] = ['0']
no_autocorrect_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
corrections = showing_results_for.substitute(
corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
original_query_url = no_autocorrect_query_url,
original_query = html.escape(renderer['originalQuery']['simpleText']),
)
continue
result_list_html += common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
page = int(page)
if page <= 5:
@ -129,13 +130,13 @@ def get_search_page(env, start_response):
result = Template(yt_search_results_template).substitute(
header = common.get_header(query),
header = html_common.get_header(query),
results = result_list_html,
page_title = query + " - Search",
search_box_value = html.escape(query),
number_of_results = '{:,}'.format(estimated_results),
number_of_pages = '{:,}'.format(estimated_pages),
page_buttons = common.page_buttons_html(page, estimated_pages, URL_ORIGIN + "/search", env['QUERY_STRING']),
page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']),
corrections = corrections
)
return result.encode('utf-8')

153
youtube/util.py Normal file
View File

@ -0,0 +1,153 @@
import socks, sockshandler
import gzip
import brotli
import urllib.parse
import re
import time
import settings
URL_ORIGIN = "/https://www.youtube.com"
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
import http.cookiejar
self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive
def http_request(self, request):
if self.cookiejar_send is not None:
self.cookiejar_send.add_cookie_header(request)
return request
def http_response(self, request, response):
if self.cookiejar_receive is not None:
self.cookiejar_receive.extract_cookies(response, request)
return response
https_request = http_request
https_response = http_response
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings):
if encoding == 'identity':
continue
if encoding == 'br':
content = brotli.decompress(content)
elif encoding == 'gzip':
content = gzip.decompress(content)
return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
'''
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object,
cookies received in the response will be merged into the object (nothing will be sent from it)
When both are set to the same object, cookies will be sent from the object,
and response cookies will be merged into it.
'''
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
headers['Accept-Encoding'] = 'gzip, br'
# prevent python version being leaked by urllib if User-Agent isn't provided
# (urllib will use ex. Python-urllib/3.6 otherwise)
if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
headers['User-Agent'] = 'Python-urllib'
if data is not None:
if isinstance(data, str):
data = data.encode('ascii')
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii')
start_time = time.time()
req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
if use_tor and settings.route_tor:
opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
else:
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
response_time = time.time()
content = response.read()
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
return content
mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),)
def dict_add(*dicts):
for dictionary in dicts[1:]:
dicts[0].update(dictionary)
return dicts[0]
def video_id(url):
url_parts = urllib.parse.urlparse(url)
return urllib.parse.parse_qs(url_parts.query)['v'][0]
def default_multi_get(object, *keys, default):
''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
try:
for key in keys:
object = object[key]
return object
except (IndexError, KeyError):
return default
# default, sddefault, mqdefault, hqdefault, hq720
def get_thumbnail_url(video_id):
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
def seconds_to_timestamp(seconds):
seconds = int(seconds)
hours, seconds = divmod(seconds,3600)
minutes, seconds = divmod(seconds,60)
if hours != 0:
timestamp = str(hours) + ":"
timestamp += str(minutes).zfill(2) # zfill pads with zeros
else:
timestamp = str(minutes)
timestamp += ":" + str(seconds).zfill(2)
return timestamp
def update_query_string(query_string, items):
parameters = urllib.parse.parse_qs(query_string)
parameters.update(items)
return urllib.parse.urlencode(parameters, doseq=True)
def uppercase_escape(s):
return re.sub(
r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s)

View File

@ -1,12 +1,12 @@
from youtube import util, html_common, comments
from youtube_dl.YoutubeDL import YoutubeDL
from youtube_dl.extractor.youtube import YoutubeError
import json
import urllib
from string import Template
import html
import youtube.common as common
from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
import youtube.comments as comments
import gevent
import settings
import os
@ -127,9 +127,9 @@ def get_related_items_html(info):
result = ""
for item in info['related_vids']:
if 'list' in item: # playlist:
result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
result += html_common.small_playlist_item_html(watch_page_related_playlist_info(item))
else:
result += common.small_video_item_html(watch_page_related_video_info(item))
result += html_common.small_video_item_html(watch_page_related_video_info(item))
return result
@ -137,7 +137,7 @@ def get_related_items_html(info):
# converts these to standard names
def watch_page_related_video_info(item):
result = {key: item[key] for key in ('id', 'title', 'author')}
result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
result['duration'] = util.seconds_to_timestamp(item['length_seconds'])
try:
result['views'] = item['short_view_count_text']
except KeyError:
@ -155,9 +155,9 @@ def watch_page_related_playlist_info(item):
def sort_formats(info):
sorted_formats = info['formats'].copy()
sorted_formats.sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
sorted_formats.sort(key=lambda x: util.default_multi_get(_formats, x['format_id'], 'height', default=0))
for index, format in enumerate(sorted_formats):
if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
if util.default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
break
sorted_formats = sorted_formats[index:] + sorted_formats[0:index]
sorted_formats = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
@ -236,7 +236,7 @@ def get_watch_page(env, start_response):
start_response('200 OK', [('Content-type','text/html'),])
lc = common.default_multi_get(env['parameters'], 'lc', 0, default='')
lc = util.default_multi_get(env['parameters'], 'lc', 0, default='')
if settings.route_tor:
proxy = 'socks5://127.0.0.1:9150/'
else:
@ -256,17 +256,17 @@ def get_watch_page(env, start_response):
#chosen_format = choose_format(info)
if isinstance(info, str): # youtube error
return common.yt_basic_template.substitute(
return html_common.yt_basic_template.substitute(
page_title = "Error",
style = "",
header = common.get_header(),
header = html_common.get_header(),
page = html.escape(info),
).encode('utf-8')
sorted_formats = sort_formats(info)
video_info = {
"duration": common.seconds_to_timestamp(info["duration"]),
"duration": util.seconds_to_timestamp(info["duration"]),
"id": info['id'],
"title": info['title'],
"author": info['uploader'],
@ -338,7 +338,7 @@ def get_watch_page(env, start_response):
page = yt_watch_template.substitute(
video_title = html.escape(info["title"]),
page_title = html.escape(info["title"]),
header = common.get_header(),
header = html_common.get_header(),
uploader = html.escape(info["uploader"]),
uploader_channel_url = '/' + info["uploader_url"],
upload_date = upload_date,

View File

@ -1,7 +1,7 @@
import mimetypes
import urllib.parse
import os
from youtube import local_playlist, watch, search, playlist, channel, comments, common, post_comment, accounts
from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util
import settings
YOUTUBE_FILES = (
"/shared.css",
@ -64,7 +64,7 @@ def youtube(env, start_response):
elif path.startswith("/api/"):
start_response('200 OK', [('Content-type', 'text/vtt'),] )
result = common.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
result = util.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
result = result.replace(b"align:start position:0%", b"")
return result

205
youtube/yt_data_extract.py Normal file
View File

@ -0,0 +1,205 @@
import html
# videos (all of type str):
# id
# title
# url
# author
# author_url
# thumbnail
# description
# published
# duration
# likes
# dislikes
# views
# playlist_index
# playlists:
# id
# title
# url
# author
# author_url
# thumbnail
# description
# updated
# size
# first_video_id
def get_plain_text(node):
try:
return html.escape(node['simpleText'])
except KeyError:
return unformmated_text_runs(node['runs'])
def unformmated_text_runs(runs):
result = ''
for text_run in runs:
result += html.escape(text_run["text"])
return result
def format_text_runs(runs):
if isinstance(runs, str):
return runs
result = ''
for text_run in runs:
if text_run.get("bold", False):
result += "<b>" + html.escape(text_run["text"]) + "</b>"
elif text_run.get('italics', False):
result += "<i>" + html.escape(text_run["text"]) + "</i>"
else:
result += html.escape(text_run["text"])
return result
def get_url(node):
try:
return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
except KeyError:
return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
def get_text(node):
try:
return node['simpleText']
except KeyError:
pass
try:
return node['runs'][0]['text']
except IndexError: # empty text runs
return ''
def get_formatted_text(node):
try:
return node['runs']
except KeyError:
return node['simpleText']
def get_badges(node):
badges = []
for badge_node in node:
badge = badge_node['metadataBadgeRenderer']['label']
if badge.lower() != 'new':
badges.append(badge)
return badges
def get_thumbnail(node):
try:
return node['thumbnails'][0]['url'] # polymer format
except KeyError:
return node['url'] # ajax format
dispatch = {
# polymer format
'title': ('title', get_text),
'publishedTimeText': ('published', get_text),
'videoId': ('id', lambda node: node),
'descriptionSnippet': ('description', get_formatted_text),
'lengthText': ('duration', get_text),
'thumbnail': ('thumbnail', get_thumbnail),
'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
'viewCountText': ('views', get_text),
'numVideosText': ('size', lambda node: get_text(node).split(' ')[0]), # the format is "324 videos"
'videoCountText': ('size', get_text),
'playlistId': ('id', lambda node: node),
'descriptionText': ('description', get_formatted_text),
'subscriberCountText': ('subscriber_count', get_text),
'channelId': ('id', lambda node: node),
'badges': ('badges', get_badges),
# ajax format
'view_count_text': ('views', get_text),
'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
'owner_text': ('author', get_text),
'owner_endpoint': ('author_url', lambda node: node['url']),
'description': ('description', get_formatted_text),
'index': ('playlist_index', get_text),
'short_byline': ('author', get_text),
'length': ('duration', get_text),
'video_id': ('id', lambda node: node),
}
def renderer_info(renderer):
try:
info = {}
if 'viewCountText' in renderer: # prefer this one as it contains all the digits
info['views'] = get_text(renderer['viewCountText'])
elif 'shortViewCountText' in renderer:
info['views'] = get_text(renderer['shortViewCountText'])
if 'ownerText' in renderer:
info['author'] = renderer['ownerText']['runs'][0]['text']
info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
try:
overlays = renderer['thumbnailOverlays']
except KeyError:
pass
else:
for overlay in overlays:
if 'thumbnailOverlayTimeStatusRenderer' in overlay:
info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
# show renderers don't have videoCountText
elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
# show renderers don't have playlistId, have to dig into the url to get it
try:
info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
except KeyError:
pass
for key, node in renderer.items():
if key in ('longBylineText', 'shortBylineText'):
info['author'] = get_text(node)
try:
info['author_url'] = get_url(node)
except KeyError:
pass
# show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
else:
try:
simple_key, function = dispatch[key]
except KeyError:
continue
info[simple_key] = function(node)
return info
except KeyError:
print(renderer)
raise
def ajax_info(item_json):
try:
info = {}
for key, node in item_json.items():
try:
simple_key, function = dispatch[key]
except KeyError:
continue
info[simple_key] = function(node)
return info
except KeyError:
print(item_json)
raise