refactor common.py into 3 files
This commit is contained in:
parent
a61ba6b8f4
commit
b32330be4f
@ -1,10 +1,10 @@
|
|||||||
# Contains functions having to do with logging in
|
# Contains functions having to do with logging in
|
||||||
|
from youtube import util, html_common
|
||||||
|
import settings
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
import json
|
import json
|
||||||
from youtube import common
|
|
||||||
import re
|
import re
|
||||||
import settings
|
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
@ -106,7 +106,7 @@ def get_account_login_page(env, start_response):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
page = '''
|
page = '''
|
||||||
<form action="''' + common.URL_ORIGIN + '''/login" method="POST">
|
<form action="''' + util.URL_ORIGIN + '''/login" method="POST">
|
||||||
<div class="form-field">
|
<div class="form-field">
|
||||||
<label for="username">Username:</label>
|
<label for="username">Username:</label>
|
||||||
<input type="text" id="username" name="username">
|
<input type="text" id="username" name="username">
|
||||||
@ -130,10 +130,10 @@ Using Tor to log in should only be done if the account was created using a proxy
|
|||||||
</div>
|
</div>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Login",
|
page_title = "Login",
|
||||||
style = style,
|
style = style,
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
page = page,
|
page = page,
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
|
||||||
@ -229,7 +229,7 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
Taken from youtube-dl
|
Taken from youtube-dl
|
||||||
"""
|
"""
|
||||||
|
|
||||||
login_page = common.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
||||||
'''with open('debug/login_page', 'w', encoding='utf-8') as f:
|
'''with open('debug/login_page', 'w', encoding='utf-8') as f:
|
||||||
f.write(login_page)'''
|
f.write(login_page)'''
|
||||||
#print(cookiejar.as_lwp_str())
|
#print(cookiejar.as_lwp_str())
|
||||||
@ -255,7 +255,7 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
'Google-Accounts-XSRF': 1,
|
'Google-Accounts-XSRF': 1,
|
||||||
}
|
}
|
||||||
headers.update(yt_dl_headers)
|
headers.update(yt_dl_headers)
|
||||||
result = common.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
||||||
#print(cookiejar.as_lwp_str())
|
#print(cookiejar.as_lwp_str())
|
||||||
'''with open('debug/' + note, 'w', encoding='utf-8') as f:
|
'''with open('debug/' + note, 'w', encoding='utf-8') as f:
|
||||||
f.write(result)'''
|
f.write(result)'''
|
||||||
@ -387,7 +387,7 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
check_cookie_results = common.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
||||||
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -398,7 +398,7 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
warn('Unable to log in')
|
warn('Unable to log in')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
select_site_page = common.fetch_url('https://m.youtube.com/select_site', headers=common.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
|
select_site_page = util.fetch_url('https://m.youtube.com/select_site', headers=util.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
|
||||||
match = _CHANNEL_ID_RE.search(select_site_page)
|
match = _CHANNEL_ID_RE.search(select_site_page)
|
||||||
if match is None:
|
if match is None:
|
||||||
warn('Failed to find channel id')
|
warn('Failed to find channel id')
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import base64
|
import base64
|
||||||
import youtube.common as common
|
from youtube import util, yt_data_extract, html_common
|
||||||
from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
|
|
||||||
import http_errors
|
import http_errors
|
||||||
import urllib
|
import urllib
|
||||||
import json
|
import json
|
||||||
@ -91,7 +91,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
|
|||||||
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
|
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
|
||||||
|
|
||||||
print("Sending channel tab ajax request")
|
print("Sending channel tab ajax request")
|
||||||
content = common.fetch_url(url, common.desktop_ua + headers_1)
|
content = util.fetch_url(url, util.desktop_ua + headers_1)
|
||||||
print("Finished recieving channel tab response")
|
print("Finished recieving channel tab response")
|
||||||
|
|
||||||
'''with open('debug/channel_debug', 'wb') as f:
|
'''with open('debug/channel_debug', 'wb') as f:
|
||||||
@ -110,7 +110,7 @@ def get_number_of_videos(channel_id):
|
|||||||
|
|
||||||
# Sometimes retrieving playlist info fails with 403 for no discernable reason
|
# Sometimes retrieving playlist info fails with 403 for no discernable reason
|
||||||
try:
|
try:
|
||||||
response = common.fetch_url(url, common.mobile_ua + headers_pbj)
|
response = util.fetch_url(url, util.mobile_ua + headers_pbj)
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
if e.code != 403:
|
if e.code != 403:
|
||||||
raise
|
raise
|
||||||
@ -133,20 +133,20 @@ def get_channel_id(username):
|
|||||||
# method that gives the smallest possible response at ~10 kb
|
# method that gives the smallest possible response at ~10 kb
|
||||||
# needs to be as fast as possible
|
# needs to be as fast as possible
|
||||||
url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
|
url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
|
||||||
response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
|
response = util.fetch_url(url, util.mobile_ua + headers_1).decode('utf-8')
|
||||||
return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
|
return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
|
||||||
|
|
||||||
def grid_items_html(items, additional_info={}):
|
def grid_items_html(items, additional_info={}):
|
||||||
result = ''' <nav class="item-grid">\n'''
|
result = ''' <nav class="item-grid">\n'''
|
||||||
for item in items:
|
for item in items:
|
||||||
result += common.renderer_html(item, additional_info)
|
result += html_common.renderer_html(item, additional_info)
|
||||||
result += '''\n</nav>'''
|
result += '''\n</nav>'''
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def list_items_html(items, additional_info={}):
|
def list_items_html(items, additional_info={}):
|
||||||
result = ''' <nav class="item-list">'''
|
result = ''' <nav class="item-list">'''
|
||||||
for item in items:
|
for item in items:
|
||||||
result += common.renderer_html(item, additional_info)
|
result += html_common.renderer_html(item, additional_info)
|
||||||
result += '''\n</nav>'''
|
result += '''\n</nav>'''
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -168,11 +168,11 @@ def channel_tabs_html(channel_id, current_tab, search_box_value=''):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result += channel_tab_template.substitute(
|
result += channel_tab_template.substitute(
|
||||||
href_attribute = ' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
|
href_attribute = ' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
|
||||||
tab_name = tab_name,
|
tab_name = tab_name,
|
||||||
)
|
)
|
||||||
result += channel_search_template.substitute(
|
result += channel_search_template.substitute(
|
||||||
action = URL_ORIGIN + "/channel/" + channel_id + "/search",
|
action = util.URL_ORIGIN + "/channel/" + channel_id + "/search",
|
||||||
search_box_value = html.escape(search_box_value),
|
search_box_value = html.escape(search_box_value),
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
@ -192,7 +192,7 @@ def channel_sort_buttons_html(channel_id, tab, current_sort):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result += channel_sort_button_template.substitute(
|
result += channel_sort_button_template.substitute(
|
||||||
href_attribute=' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
|
href_attribute=' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
|
||||||
text = 'Sort by ' + sort_name
|
text = 'Sort by ' + sort_name
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
@ -246,14 +246,14 @@ def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_
|
|||||||
items_html = grid_items_html(items, {'author': microformat['title']})
|
items_html = grid_items_html(items, {'author': microformat['title']})
|
||||||
|
|
||||||
return yt_channel_items_template.substitute(
|
return yt_channel_items_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
channel_title = microformat['title'],
|
channel_title = microformat['title'],
|
||||||
channel_tabs = channel_tabs_html(channel_id, 'Videos'),
|
channel_tabs = channel_tabs_html(channel_id, 'Videos'),
|
||||||
sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort),
|
sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort),
|
||||||
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
|
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
|
||||||
page_title = microformat['title'] + ' - Channel',
|
page_title = microformat['title'] + ' - Channel',
|
||||||
items = items_html,
|
items = items_html,
|
||||||
page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
|
page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/30), util.URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
|
||||||
number_of_results = '{:,}'.format(number_of_videos) + " videos",
|
number_of_results = '{:,}'.format(number_of_videos) + " videos",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -267,7 +267,7 @@ def channel_playlists_html(polymer_json, current_sort=3):
|
|||||||
items_html = grid_items_html(items, {'author': microformat['title']})
|
items_html = grid_items_html(items, {'author': microformat['title']})
|
||||||
|
|
||||||
return yt_channel_items_template.substitute(
|
return yt_channel_items_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
channel_title = microformat['title'],
|
channel_title = microformat['title'],
|
||||||
channel_tabs = channel_tabs_html(channel_id, 'Playlists'),
|
channel_tabs = channel_tabs_html(channel_id, 'Playlists'),
|
||||||
sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort),
|
sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort),
|
||||||
@ -310,25 +310,25 @@ def channel_about_page(polymer_json):
|
|||||||
|
|
||||||
channel_links += channel_link_template.substitute(
|
channel_links += channel_link_template.substitute(
|
||||||
url = html.escape(url),
|
url = html.escape(url),
|
||||||
text = common.get_plain_text(link_json['title']),
|
text = yt_data_extract.get_plain_text(link_json['title']),
|
||||||
)
|
)
|
||||||
|
|
||||||
stats = ''
|
stats = ''
|
||||||
for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
|
for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
|
||||||
try:
|
try:
|
||||||
stat_value = common.get_plain_text(channel_metadata[stat_name])
|
stat_value = yt_data_extract.get_plain_text(channel_metadata[stat_name])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
stats += stat_template.substitute(stat_value=stat_value)
|
stats += stat_template.substitute(stat_value=stat_value)
|
||||||
try:
|
try:
|
||||||
description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
|
description = yt_data_extract.format_text_runs(yt_data_extract.get_formatted_text(channel_metadata['description']))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
description = ''
|
description = ''
|
||||||
return yt_channel_about_template.substitute(
|
return yt_channel_about_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
page_title = common.get_plain_text(channel_metadata['title']) + ' - About',
|
page_title = yt_data_extract.get_plain_text(channel_metadata['title']) + ' - About',
|
||||||
channel_title = common.get_plain_text(channel_metadata['title']),
|
channel_title = yt_data_extract.get_plain_text(channel_metadata['title']),
|
||||||
avatar = html.escape(avatar),
|
avatar = html.escape(avatar),
|
||||||
description = description,
|
description = description,
|
||||||
links = channel_links,
|
links = channel_links,
|
||||||
@ -351,13 +351,13 @@ def channel_search_page(polymer_json, query, current_page=1, number_of_videos =
|
|||||||
items_html = list_items_html(items)
|
items_html = list_items_html(items)
|
||||||
|
|
||||||
return yt_channel_items_template.substitute(
|
return yt_channel_items_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
channel_title = html.escape(microformat['title']),
|
channel_title = html.escape(microformat['title']),
|
||||||
channel_tabs = channel_tabs_html(channel_id, '', query),
|
channel_tabs = channel_tabs_html(channel_id, '', query),
|
||||||
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
|
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
|
||||||
page_title = html.escape(query + ' - Channel search'),
|
page_title = html.escape(query + ' - Channel search'),
|
||||||
items = items_html,
|
items = items_html,
|
||||||
page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/29), URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
|
page_buttons = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/29), util.URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
|
||||||
number_of_results = '',
|
number_of_results = '',
|
||||||
sort_buttons = '',
|
sort_buttons = '',
|
||||||
)
|
)
|
||||||
@ -367,7 +367,7 @@ def get_channel_search_json(channel_id, query, page):
|
|||||||
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
|
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
|
||||||
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
|
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
|
||||||
|
|
||||||
polymer_json = common.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1)
|
||||||
'''with open('debug/channel_search_debug', 'wb') as f:
|
'''with open('debug/channel_search_debug', 'wb') as f:
|
||||||
f.write(polymer_json)'''
|
f.write(polymer_json)'''
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
@ -384,10 +384,10 @@ def get_channel_page(env, start_response):
|
|||||||
tab = 'videos'
|
tab = 'videos'
|
||||||
|
|
||||||
parameters = env['parameters']
|
parameters = env['parameters']
|
||||||
page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
|
page_number = int(util.default_multi_get(parameters, 'page', 0, default='1'))
|
||||||
sort = common.default_multi_get(parameters, 'sort', 0, default='3')
|
sort = util.default_multi_get(parameters, 'sort', 0, default='3')
|
||||||
view = common.default_multi_get(parameters, 'view', 0, default='1')
|
view = util.default_multi_get(parameters, 'view', 0, default='1')
|
||||||
query = common.default_multi_get(parameters, 'query', 0, default='')
|
query = util.default_multi_get(parameters, 'query', 0, default='')
|
||||||
|
|
||||||
if tab == 'videos':
|
if tab == 'videos':
|
||||||
tasks = (
|
tasks = (
|
||||||
@ -399,11 +399,11 @@ def get_channel_page(env, start_response):
|
|||||||
|
|
||||||
result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
|
result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
|
||||||
elif tab == 'about':
|
elif tab == 'about':
|
||||||
polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
result = channel_about_page(polymer_json)
|
result = channel_about_page(polymer_json)
|
||||||
elif tab == 'playlists':
|
elif tab == 'playlists':
|
||||||
polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
|
||||||
'''with open('debug/channel_playlists_debug', 'wb') as f:
|
'''with open('debug/channel_playlists_debug', 'wb') as f:
|
||||||
f.write(polymer_json)'''
|
f.write(polymer_json)'''
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
@ -443,22 +443,22 @@ def get_channel_page_general_url(env, start_response):
|
|||||||
return b'Invalid channel url'
|
return b'Invalid channel url'
|
||||||
|
|
||||||
if page == 'videos':
|
if page == 'videos':
|
||||||
polymer_json = common.fetch_url(base_url + '/videos?pbj=1&view=0', common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1)
|
||||||
'''with open('debug/user_page_videos', 'wb') as f:
|
'''with open('debug/user_page_videos', 'wb') as f:
|
||||||
f.write(polymer_json)'''
|
f.write(polymer_json)'''
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
result = channel_videos_html(polymer_json)
|
result = channel_videos_html(polymer_json)
|
||||||
elif page == 'about':
|
elif page == 'about':
|
||||||
polymer_json = common.fetch_url(base_url + '/about?pbj=1', common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1)
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
result = channel_about_page(polymer_json)
|
result = channel_about_page(polymer_json)
|
||||||
elif page == 'playlists':
|
elif page == 'playlists':
|
||||||
polymer_json = common.fetch_url(base_url+ '/playlists?pbj=1&view=1', common.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1)
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
result = channel_playlists_html(polymer_json)
|
result = channel_playlists_html(polymer_json)
|
||||||
elif page == 'search':
|
elif page == 'search':
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
'''polymer_json = common.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, common.desktop_ua + headers_1)
|
'''polymer_json = util.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, util.desktop_ua + headers_1)
|
||||||
polymer_json = json.loads(polymer_json)
|
polymer_json = json.loads(polymer_json)
|
||||||
return channel_search_page('''
|
return channel_search_page('''
|
||||||
else:
|
else:
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
|
from youtube import proto, util, html_common, yt_data_extract, accounts
|
||||||
|
import settings
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from youtube import proto, common, accounts
|
|
||||||
import base64
|
import base64
|
||||||
from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
|
|
||||||
from string import Template
|
from string import Template
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib
|
import urllib
|
||||||
import html
|
import html
|
||||||
import settings
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
comment_area_template = Template('''
|
comment_area_template = Template('''
|
||||||
<section class="comment-area">
|
<section class="comment-area">
|
||||||
$video-metadata
|
$video-metadata
|
||||||
@ -130,7 +131,7 @@ def request_comments(ctoken, replies=False):
|
|||||||
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
|
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
|
||||||
|
|
||||||
for i in range(0,8): # don't retry more than 8 times
|
for i in range(0,8): # don't retry more than 8 times
|
||||||
content = fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
|
content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
|
||||||
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
|
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
|
||||||
content = content[4:]
|
content = content[4:]
|
||||||
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
|
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
|
||||||
@ -151,10 +152,10 @@ def single_comment_ctoken(video_id, comment_id):
|
|||||||
|
|
||||||
def parse_comments_ajax(content, replies=False):
|
def parse_comments_ajax(content, replies=False):
|
||||||
try:
|
try:
|
||||||
content = json.loads(uppercase_escape(content.decode('utf-8')))
|
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
#print(content)
|
#print(content)
|
||||||
comments_raw = content['content']['continuation_contents']['contents']
|
comments_raw = content['content']['continuation_contents']['contents']
|
||||||
ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
|
ctoken = util.default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
|
||||||
|
|
||||||
comments = []
|
comments = []
|
||||||
for comment_raw in comments_raw:
|
for comment_raw in comments_raw:
|
||||||
@ -163,7 +164,7 @@ def parse_comments_ajax(content, replies=False):
|
|||||||
if comment_raw['replies'] is not None:
|
if comment_raw['replies'] is not None:
|
||||||
reply_ctoken = comment_raw['replies']['continuations'][0]['continuation']
|
reply_ctoken = comment_raw['replies']['continuations'][0]['continuation']
|
||||||
comment_id, video_id = get_ids(reply_ctoken)
|
comment_id, video_id = get_ids(reply_ctoken)
|
||||||
replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
|
replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
|
||||||
comment_raw = comment_raw['comment']
|
comment_raw = comment_raw['comment']
|
||||||
comment = {
|
comment = {
|
||||||
'author': comment_raw['author']['runs'][0]['text'],
|
'author': comment_raw['author']['runs'][0]['text'],
|
||||||
@ -189,7 +190,7 @@ reply_count_regex = re.compile(r'(\d+)')
|
|||||||
def parse_comments_polymer(content, replies=False):
|
def parse_comments_polymer(content, replies=False):
|
||||||
try:
|
try:
|
||||||
video_title = ''
|
video_title = ''
|
||||||
content = json.loads(uppercase_escape(content.decode('utf-8')))
|
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
url = content[1]['url']
|
url = content[1]['url']
|
||||||
ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
|
ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
|
||||||
video_id = ctoken_metadata(ctoken)['video_id']
|
video_id = ctoken_metadata(ctoken)['video_id']
|
||||||
@ -200,7 +201,7 @@ def parse_comments_polymer(content, replies=False):
|
|||||||
comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']
|
comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']
|
||||||
replies = True
|
replies = True
|
||||||
|
|
||||||
ctoken = default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
|
ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
|
||||||
|
|
||||||
comments = []
|
comments = []
|
||||||
for comment_raw in comments_raw:
|
for comment_raw in comments_raw:
|
||||||
@ -219,8 +220,8 @@ def parse_comments_polymer(content, replies=False):
|
|||||||
if 'replies' in comment_raw:
|
if 'replies' in comment_raw:
|
||||||
#reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
|
#reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
|
||||||
#comment_id, video_id = get_ids(reply_ctoken)
|
#comment_id, video_id = get_ids(reply_ctoken)
|
||||||
replies_url = URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
|
replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
|
||||||
view_replies_text = common.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
|
view_replies_text = yt_data_extract.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
|
||||||
match = reply_count_regex.search(view_replies_text)
|
match = reply_count_regex.search(view_replies_text)
|
||||||
if match is None:
|
if match is None:
|
||||||
view_replies_text = '1 reply'
|
view_replies_text = '1 reply'
|
||||||
@ -228,18 +229,18 @@ def parse_comments_polymer(content, replies=False):
|
|||||||
view_replies_text = match.group(1) + " replies"
|
view_replies_text = match.group(1) + " replies"
|
||||||
elif not replies:
|
elif not replies:
|
||||||
view_replies_text = "Reply"
|
view_replies_text = "Reply"
|
||||||
replies_url = URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
|
replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
|
||||||
comment_raw = comment_raw['comment']
|
comment_raw = comment_raw['comment']
|
||||||
|
|
||||||
comment_raw = comment_raw['commentRenderer']
|
comment_raw = comment_raw['commentRenderer']
|
||||||
comment = {
|
comment = {
|
||||||
'author': common.get_plain_text(comment_raw['authorText']),
|
'author': yt_data_extract.get_plain_text(comment_raw['authorText']),
|
||||||
'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'],
|
'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||||
'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'],
|
'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'],
|
||||||
'author_id': comment_raw['authorId'],
|
'author_id': comment_raw['authorId'],
|
||||||
'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'],
|
'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'],
|
||||||
'likes': comment_raw['likeCount'],
|
'likes': comment_raw['likeCount'],
|
||||||
'published': common.get_plain_text(comment_raw['publishedTimeText']),
|
'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']),
|
||||||
'text': comment_raw['contentText'].get('runs', ''),
|
'text': comment_raw['contentText'].get('runs', ''),
|
||||||
'view_replies_text': view_replies_text,
|
'view_replies_text': view_replies_text,
|
||||||
'replies_url': replies_url,
|
'replies_url': replies_url,
|
||||||
@ -264,13 +265,13 @@ def get_comments_html(comments):
|
|||||||
replies = reply_link_template.substitute(url=comment['replies_url'], view_replies_text=html.escape(comment['view_replies_text']))
|
replies = reply_link_template.substitute(url=comment['replies_url'], view_replies_text=html.escape(comment['view_replies_text']))
|
||||||
if settings.enable_comment_avatars:
|
if settings.enable_comment_avatars:
|
||||||
avatar = comment_avatar_template.substitute(
|
avatar = comment_avatar_template.substitute(
|
||||||
author_url = URL_ORIGIN + comment['author_url'],
|
author_url = util.URL_ORIGIN + comment['author_url'],
|
||||||
author_avatar = '/' + comment['author_avatar'],
|
author_avatar = '/' + comment['author_avatar'],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
avatar = ''
|
avatar = ''
|
||||||
if comment['author_channel_id'] in accounts.accounts:
|
if comment['author_channel_id'] in accounts.accounts:
|
||||||
delete_url = (URL_ORIGIN + '/delete_comment?video_id='
|
delete_url = (util.URL_ORIGIN + '/delete_comment?video_id='
|
||||||
+ comment['video_id']
|
+ comment['video_id']
|
||||||
+ '&channel_id='+ comment['author_channel_id']
|
+ '&channel_id='+ comment['author_channel_id']
|
||||||
+ '&author_id=' + comment['author_id']
|
+ '&author_id=' + comment['author_id']
|
||||||
@ -280,14 +281,14 @@ def get_comments_html(comments):
|
|||||||
else:
|
else:
|
||||||
action_buttons = ''
|
action_buttons = ''
|
||||||
|
|
||||||
permalink = URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
|
permalink = util.URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
|
||||||
html_result += comment_template.substitute(
|
html_result += comment_template.substitute(
|
||||||
author=comment['author'],
|
author=comment['author'],
|
||||||
author_url = URL_ORIGIN + comment['author_url'],
|
author_url = util.URL_ORIGIN + comment['author_url'],
|
||||||
avatar = avatar,
|
avatar = avatar,
|
||||||
likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
|
likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
|
||||||
published = comment['published'],
|
published = comment['published'],
|
||||||
text = format_text_runs(comment['text']),
|
text = yt_data_extract.format_text_runs(comment['text']),
|
||||||
datetime = '', #TODO
|
datetime = '', #TODO
|
||||||
replies = replies,
|
replies = replies,
|
||||||
action_buttons = action_buttons,
|
action_buttons = action_buttons,
|
||||||
@ -297,10 +298,10 @@ def get_comments_html(comments):
|
|||||||
|
|
||||||
def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
|
def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
|
||||||
if settings.enable_comments:
|
if settings.enable_comments:
|
||||||
post_comment_url = common.URL_ORIGIN + "/post_comment?video_id=" + video_id
|
post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id
|
||||||
post_comment_link = '''<a class="sort-button" href="''' + post_comment_url + '''">Post comment</a>'''
|
post_comment_link = '''<a class="sort-button" href="''' + post_comment_url + '''">Post comment</a>'''
|
||||||
|
|
||||||
other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
|
other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
|
||||||
other_sort_name = 'newest' if sort == 0 else 'top'
|
other_sort_name = 'newest' if sort == 0 else 'top'
|
||||||
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
|
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
|
||||||
|
|
||||||
@ -314,7 +315,7 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
|
|||||||
if ctoken == '':
|
if ctoken == '':
|
||||||
more_comments_button = ''
|
more_comments_button = ''
|
||||||
else:
|
else:
|
||||||
more_comments_button = more_comments_template.substitute(url = common.URL_ORIGIN + '/comments?ctoken=' + ctoken)
|
more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
|
||||||
|
|
||||||
result = '''<section class="comments-area">\n'''
|
result = '''<section class="comments-area">\n'''
|
||||||
result += comment_links + '\n'
|
result += comment_links + '\n'
|
||||||
@ -350,7 +351,7 @@ comment_box_template = Template('''
|
|||||||
<select id="account-selection" name="channel_id">
|
<select id="account-selection" name="channel_id">
|
||||||
$options
|
$options
|
||||||
</select>
|
</select>
|
||||||
<a href="''' + common.URL_ORIGIN + '''/login" target="_blank">Add account</a>
|
<a href="''' + util.URL_ORIGIN + '''/login" target="_blank">Add account</a>
|
||||||
</div>
|
</div>
|
||||||
<textarea name="comment_text"></textarea>
|
<textarea name="comment_text"></textarea>
|
||||||
$video_id_input
|
$video_id_input
|
||||||
@ -359,7 +360,7 @@ $options
|
|||||||
def get_comments_page(env, start_response):
|
def get_comments_page(env, start_response):
|
||||||
start_response('200 OK', [('Content-type','text/html'),] )
|
start_response('200 OK', [('Content-type','text/html'),] )
|
||||||
parameters = env['parameters']
|
parameters = env['parameters']
|
||||||
ctoken = default_multi_get(parameters, 'ctoken', 0, default='')
|
ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='')
|
||||||
replies = False
|
replies = False
|
||||||
if not ctoken:
|
if not ctoken:
|
||||||
video_id = parameters['video_id'][0]
|
video_id = parameters['video_id'][0]
|
||||||
@ -384,17 +385,17 @@ def get_comments_page(env, start_response):
|
|||||||
page_number = page_number,
|
page_number = page_number,
|
||||||
sort = 'top' if metadata['sort'] == 0 else 'newest',
|
sort = 'top' if metadata['sort'] == 0 else 'newest',
|
||||||
title = html.escape(comment_info['video_title']),
|
title = html.escape(comment_info['video_title']),
|
||||||
url = common.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
|
url = util.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
|
||||||
thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg',
|
thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg',
|
||||||
)
|
)
|
||||||
comment_box = comment_box_template.substitute(
|
comment_box = comment_box_template.substitute(
|
||||||
form_action= common.URL_ORIGIN + '/post_comment',
|
form_action= util.URL_ORIGIN + '/post_comment',
|
||||||
video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''',
|
video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''',
|
||||||
post_text='Post comment',
|
post_text='Post comment',
|
||||||
options=comment_box_account_options(),
|
options=comment_box_account_options(),
|
||||||
)
|
)
|
||||||
|
|
||||||
other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
|
other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
|
||||||
other_sort_name = 'newest' if metadata['sort'] == 0 else 'top'
|
other_sort_name = 'newest' if metadata['sort'] == 0 else 'top'
|
||||||
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
|
other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
|
||||||
|
|
||||||
@ -408,7 +409,7 @@ def get_comments_page(env, start_response):
|
|||||||
if ctoken == '':
|
if ctoken == '':
|
||||||
more_comments_button = ''
|
more_comments_button = ''
|
||||||
else:
|
else:
|
||||||
more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
|
more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
|
||||||
comments_area = '<section class="comments-area">\n'
|
comments_area = '<section class="comments-area">\n'
|
||||||
comments_area += video_metadata + comment_box + comment_links + '\n'
|
comments_area += video_metadata + comment_box + comment_links + '\n'
|
||||||
comments_area += '<div class="comments">\n'
|
comments_area += '<div class="comments">\n'
|
||||||
@ -417,7 +418,7 @@ def get_comments_page(env, start_response):
|
|||||||
comments_area += more_comments_button + '\n'
|
comments_area += more_comments_button + '\n'
|
||||||
comments_area += '</section>\n'
|
comments_area += '</section>\n'
|
||||||
return yt_comments_template.substitute(
|
return yt_comments_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
comments_area = comments_area,
|
comments_area = comments_area,
|
||||||
page_title = page_title,
|
page_title = page_title,
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
@ -1,46 +1,8 @@
|
|||||||
from youtube.template import Template
|
from youtube.template import Template
|
||||||
from youtube import local_playlist
|
from youtube import local_playlist, yt_data_extract, util
|
||||||
import settings
|
|
||||||
import html
|
|
||||||
import json
|
import json
|
||||||
import re
|
import html
|
||||||
import urllib.parse
|
|
||||||
import gzip
|
|
||||||
import brotli
|
|
||||||
import time
|
|
||||||
import socks, sockshandler
|
|
||||||
|
|
||||||
URL_ORIGIN = "/https://www.youtube.com"
|
|
||||||
|
|
||||||
|
|
||||||
# videos (all of type str):
|
|
||||||
|
|
||||||
# id
|
|
||||||
# title
|
|
||||||
# url
|
|
||||||
# author
|
|
||||||
# author_url
|
|
||||||
# thumbnail
|
|
||||||
# description
|
|
||||||
# published
|
|
||||||
# duration
|
|
||||||
# likes
|
|
||||||
# dislikes
|
|
||||||
# views
|
|
||||||
# playlist_index
|
|
||||||
|
|
||||||
# playlists:
|
|
||||||
|
|
||||||
# id
|
|
||||||
# title
|
|
||||||
# url
|
|
||||||
# author
|
|
||||||
# author_url
|
|
||||||
# thumbnail
|
|
||||||
# description
|
|
||||||
# updated
|
|
||||||
# size
|
|
||||||
# first_video_id
|
|
||||||
|
|
||||||
|
|
||||||
with open('yt_basic_template.html', 'r', encoding='utf-8') as file:
|
with open('yt_basic_template.html', 'r', encoding='utf-8') as file:
|
||||||
@ -139,153 +101,6 @@ medium_channel_item_template = Template('''
|
|||||||
''')
|
''')
|
||||||
|
|
||||||
|
|
||||||
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
|
||||||
'''Separate cookiejars for receiving and sending'''
|
|
||||||
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
|
||||||
import http.cookiejar
|
|
||||||
self.cookiejar_send = cookiejar_send
|
|
||||||
self.cookiejar_receive = cookiejar_receive
|
|
||||||
|
|
||||||
def http_request(self, request):
|
|
||||||
if self.cookiejar_send is not None:
|
|
||||||
self.cookiejar_send.add_cookie_header(request)
|
|
||||||
return request
|
|
||||||
|
|
||||||
def http_response(self, request, response):
|
|
||||||
if self.cookiejar_receive is not None:
|
|
||||||
self.cookiejar_receive.extract_cookies(response, request)
|
|
||||||
return response
|
|
||||||
|
|
||||||
https_request = http_request
|
|
||||||
https_response = http_response
|
|
||||||
|
|
||||||
|
|
||||||
def decode_content(content, encoding_header):
|
|
||||||
encodings = encoding_header.replace(' ', '').split(',')
|
|
||||||
for encoding in reversed(encodings):
|
|
||||||
if encoding == 'identity':
|
|
||||||
continue
|
|
||||||
if encoding == 'br':
|
|
||||||
content = brotli.decompress(content)
|
|
||||||
elif encoding == 'gzip':
|
|
||||||
content = gzip.decompress(content)
|
|
||||||
return content
|
|
||||||
|
|
||||||
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
|
|
||||||
'''
|
|
||||||
When cookiejar_send is set to a CookieJar object,
|
|
||||||
those cookies will be sent in the request (but cookies in response will not be merged into it)
|
|
||||||
When cookiejar_receive is set to a CookieJar object,
|
|
||||||
cookies received in the response will be merged into the object (nothing will be sent from it)
|
|
||||||
When both are set to the same object, cookies will be sent from the object,
|
|
||||||
and response cookies will be merged into it.
|
|
||||||
'''
|
|
||||||
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
|
||||||
headers['Accept-Encoding'] = 'gzip, br'
|
|
||||||
|
|
||||||
# prevent python version being leaked by urllib if User-Agent isn't provided
|
|
||||||
# (urllib will use ex. Python-urllib/3.6 otherwise)
|
|
||||||
if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
|
|
||||||
headers['User-Agent'] = 'Python-urllib'
|
|
||||||
|
|
||||||
if data is not None:
|
|
||||||
if isinstance(data, str):
|
|
||||||
data = data.encode('ascii')
|
|
||||||
elif not isinstance(data, bytes):
|
|
||||||
data = urllib.parse.urlencode(data).encode('ascii')
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
|
|
||||||
req = urllib.request.Request(url, data=data, headers=headers)
|
|
||||||
|
|
||||||
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
|
|
||||||
|
|
||||||
if use_tor and settings.route_tor:
|
|
||||||
opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
|
|
||||||
else:
|
|
||||||
opener = urllib.request.build_opener(cookie_processor)
|
|
||||||
|
|
||||||
response = opener.open(req, timeout=timeout)
|
|
||||||
response_time = time.time()
|
|
||||||
|
|
||||||
|
|
||||||
content = response.read()
|
|
||||||
read_finish = time.time()
|
|
||||||
if report_text:
|
|
||||||
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
|
||||||
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
|
||||||
return content
|
|
||||||
|
|
||||||
mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
|
|
||||||
mobile_ua = (('User-Agent', mobile_user_agent),)
|
|
||||||
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
|
||||||
desktop_ua = (('User-Agent', desktop_user_agent),)
|
|
||||||
|
|
||||||
def dict_add(*dicts):
|
|
||||||
for dictionary in dicts[1:]:
|
|
||||||
dicts[0].update(dictionary)
|
|
||||||
return dicts[0]
|
|
||||||
|
|
||||||
def video_id(url):
|
|
||||||
url_parts = urllib.parse.urlparse(url)
|
|
||||||
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
|
||||||
|
|
||||||
def uppercase_escape(s):
|
|
||||||
return re.sub(
|
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
|
||||||
lambda m: chr(int(m.group(1), base=16)), s)
|
|
||||||
|
|
||||||
def default_multi_get(object, *keys, default):
|
|
||||||
''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
|
|
||||||
try:
|
|
||||||
for key in keys:
|
|
||||||
object = object[key]
|
|
||||||
return object
|
|
||||||
except (IndexError, KeyError):
|
|
||||||
return default
|
|
||||||
|
|
||||||
def get_plain_text(node):
|
|
||||||
try:
|
|
||||||
return html.escape(node['simpleText'])
|
|
||||||
except KeyError:
|
|
||||||
return unformmated_text_runs(node['runs'])
|
|
||||||
|
|
||||||
def unformmated_text_runs(runs):
|
|
||||||
result = ''
|
|
||||||
for text_run in runs:
|
|
||||||
result += html.escape(text_run["text"])
|
|
||||||
return result
|
|
||||||
|
|
||||||
def format_text_runs(runs):
|
|
||||||
if isinstance(runs, str):
|
|
||||||
return runs
|
|
||||||
result = ''
|
|
||||||
for text_run in runs:
|
|
||||||
if text_run.get("bold", False):
|
|
||||||
result += "<b>" + html.escape(text_run["text"]) + "</b>"
|
|
||||||
elif text_run.get('italics', False):
|
|
||||||
result += "<i>" + html.escape(text_run["text"]) + "</i>"
|
|
||||||
else:
|
|
||||||
result += html.escape(text_run["text"])
|
|
||||||
return result
|
|
||||||
|
|
||||||
# default, sddefault, mqdefault, hqdefault, hq720
|
|
||||||
def get_thumbnail_url(video_id):
|
|
||||||
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
|
||||||
|
|
||||||
def seconds_to_timestamp(seconds):
|
|
||||||
seconds = int(seconds)
|
|
||||||
hours, seconds = divmod(seconds,3600)
|
|
||||||
minutes, seconds = divmod(seconds,60)
|
|
||||||
if hours != 0:
|
|
||||||
timestamp = str(hours) + ":"
|
|
||||||
timestamp += str(minutes).zfill(2) # zfill pads with zeros
|
|
||||||
else:
|
|
||||||
timestamp = str(minutes)
|
|
||||||
|
|
||||||
timestamp += ":" + str(seconds).zfill(2)
|
|
||||||
return timestamp
|
|
||||||
|
|
||||||
|
|
||||||
# -----
|
# -----
|
||||||
@ -299,8 +114,8 @@ def small_video_item_html(item):
|
|||||||
views = item["views"],
|
views = item["views"],
|
||||||
author = html.escape(item["author"]),
|
author = html.escape(item["author"]),
|
||||||
duration = item["duration"],
|
duration = item["duration"],
|
||||||
url = URL_ORIGIN + "/watch?v=" + item["id"],
|
url = util.URL_ORIGIN + "/watch?v=" + item["id"],
|
||||||
thumbnail = get_thumbnail_url(item['id']),
|
thumbnail = util.get_thumbnail_url(item['id']),
|
||||||
video_info = html.escape(video_info),
|
video_info = html.escape(video_info),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -309,8 +124,8 @@ def small_playlist_item_html(item):
|
|||||||
title=html.escape(item["title"]),
|
title=html.escape(item["title"]),
|
||||||
size = item['size'],
|
size = item['size'],
|
||||||
author="",
|
author="",
|
||||||
url = URL_ORIGIN + "/playlist?list=" + item["id"],
|
url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
|
||||||
thumbnail= get_thumbnail_url(item['first_video_id']),
|
thumbnail= util.get_thumbnail_url(item['first_video_id']),
|
||||||
)
|
)
|
||||||
|
|
||||||
def medium_playlist_item_html(item):
|
def medium_playlist_item_html(item):
|
||||||
@ -318,8 +133,8 @@ def medium_playlist_item_html(item):
|
|||||||
title=html.escape(item["title"]),
|
title=html.escape(item["title"]),
|
||||||
size = item['size'],
|
size = item['size'],
|
||||||
author=item['author'],
|
author=item['author'],
|
||||||
author_url= URL_ORIGIN + item['author_url'],
|
author_url= util.URL_ORIGIN + item['author_url'],
|
||||||
url = URL_ORIGIN + "/playlist?list=" + item["id"],
|
url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
|
||||||
thumbnail= item['thumbnail'],
|
thumbnail= item['thumbnail'],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -330,11 +145,11 @@ def medium_video_item_html(medium_video_info):
|
|||||||
title=html.escape(info["title"]),
|
title=html.escape(info["title"]),
|
||||||
views=info["views"],
|
views=info["views"],
|
||||||
published = info["published"],
|
published = info["published"],
|
||||||
description = format_text_runs(info["description"]),
|
description = yt_data_extract.format_text_runs(info["description"]),
|
||||||
author=html.escape(info["author"]),
|
author=html.escape(info["author"]),
|
||||||
author_url=info["author_url"],
|
author_url=info["author_url"],
|
||||||
duration=info["duration"],
|
duration=info["duration"],
|
||||||
url = URL_ORIGIN + "/watch?v=" + info["id"],
|
url = util.URL_ORIGIN + "/watch?v=" + info["id"],
|
||||||
thumbnail=info['thumbnail'],
|
thumbnail=info['thumbnail'],
|
||||||
datetime='', # TODO
|
datetime='', # TODO
|
||||||
)
|
)
|
||||||
@ -440,158 +255,28 @@ def get_header(search_box_value=""):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_url(node):
|
|
||||||
try:
|
|
||||||
return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
|
||||||
except KeyError:
|
|
||||||
return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
|
||||||
|
|
||||||
|
|
||||||
def get_text(node):
|
|
||||||
try:
|
|
||||||
return node['simpleText']
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return node['runs'][0]['text']
|
|
||||||
except IndexError: # empty text runs
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def get_formatted_text(node):
|
|
||||||
try:
|
|
||||||
return node['runs']
|
|
||||||
except KeyError:
|
|
||||||
return node['simpleText']
|
|
||||||
|
|
||||||
def get_badges(node):
|
|
||||||
badges = []
|
|
||||||
for badge_node in node:
|
|
||||||
badge = badge_node['metadataBadgeRenderer']['label']
|
|
||||||
if badge.lower() != 'new':
|
|
||||||
badges.append(badge)
|
|
||||||
return badges
|
|
||||||
|
|
||||||
def get_thumbnail(node):
|
|
||||||
try:
|
|
||||||
return node['thumbnails'][0]['url'] # polymer format
|
|
||||||
except KeyError:
|
|
||||||
return node['url'] # ajax format
|
|
||||||
|
|
||||||
dispatch = {
|
|
||||||
|
|
||||||
# polymer format
|
|
||||||
'title': ('title', get_text),
|
|
||||||
'publishedTimeText': ('published', get_text),
|
|
||||||
'videoId': ('id', lambda node: node),
|
|
||||||
'descriptionSnippet': ('description', get_formatted_text),
|
|
||||||
'lengthText': ('duration', get_text),
|
|
||||||
'thumbnail': ('thumbnail', get_thumbnail),
|
|
||||||
'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
|
|
||||||
|
|
||||||
'viewCountText': ('views', get_text),
|
|
||||||
'numVideosText': ('size', lambda node: get_text(node).split(' ')[0]), # the format is "324 videos"
|
|
||||||
'videoCountText': ('size', get_text),
|
|
||||||
'playlistId': ('id', lambda node: node),
|
|
||||||
'descriptionText': ('description', get_formatted_text),
|
|
||||||
|
|
||||||
'subscriberCountText': ('subscriber_count', get_text),
|
|
||||||
'channelId': ('id', lambda node: node),
|
|
||||||
'badges': ('badges', get_badges),
|
|
||||||
|
|
||||||
# ajax format
|
|
||||||
'view_count_text': ('views', get_text),
|
|
||||||
'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
|
|
||||||
'owner_text': ('author', get_text),
|
|
||||||
'owner_endpoint': ('author_url', lambda node: node['url']),
|
|
||||||
'description': ('description', get_formatted_text),
|
|
||||||
'index': ('playlist_index', get_text),
|
|
||||||
'short_byline': ('author', get_text),
|
|
||||||
'length': ('duration', get_text),
|
|
||||||
'video_id': ('id', lambda node: node),
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def renderer_info(renderer):
|
|
||||||
try:
|
|
||||||
info = {}
|
|
||||||
if 'viewCountText' in renderer: # prefer this one as it contains all the digits
|
|
||||||
info['views'] = get_text(renderer['viewCountText'])
|
|
||||||
elif 'shortViewCountText' in renderer:
|
|
||||||
info['views'] = get_text(renderer['shortViewCountText'])
|
|
||||||
|
|
||||||
if 'ownerText' in renderer:
|
|
||||||
info['author'] = renderer['ownerText']['runs'][0]['text']
|
|
||||||
info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
|
||||||
try:
|
|
||||||
overlays = renderer['thumbnailOverlays']
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
for overlay in overlays:
|
|
||||||
if 'thumbnailOverlayTimeStatusRenderer' in overlay:
|
|
||||||
info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
|
|
||||||
# show renderers don't have videoCountText
|
|
||||||
elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
|
|
||||||
info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
|
|
||||||
|
|
||||||
# show renderers don't have playlistId, have to dig into the url to get it
|
|
||||||
try:
|
|
||||||
info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
for key, node in renderer.items():
|
|
||||||
if key in ('longBylineText', 'shortBylineText'):
|
|
||||||
info['author'] = get_text(node)
|
|
||||||
try:
|
|
||||||
info['author_url'] = get_url(node)
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
|
|
||||||
elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
|
|
||||||
info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
simple_key, function = dispatch[key]
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
info[simple_key] = function(node)
|
|
||||||
return info
|
|
||||||
except KeyError:
|
|
||||||
print(renderer)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def ajax_info(item_json):
|
|
||||||
try:
|
|
||||||
info = {}
|
|
||||||
for key, node in item_json.items():
|
|
||||||
try:
|
|
||||||
simple_key, function = dispatch[key]
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
info[simple_key] = function(node)
|
|
||||||
return info
|
|
||||||
except KeyError:
|
|
||||||
print(item_json)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def badges_html(badges):
|
def badges_html(badges):
|
||||||
return ' | '.join(map(html.escape, badges))
|
return ' | '.join(map(html.escape, badges))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
html_transform_dispatch = {
|
html_transform_dispatch = {
|
||||||
'title': html.escape,
|
'title': html.escape,
|
||||||
'published': html.escape,
|
'published': html.escape,
|
||||||
'id': html.escape,
|
'id': html.escape,
|
||||||
'description': format_text_runs,
|
'description': yt_data_extract.format_text_runs,
|
||||||
'duration': html.escape,
|
'duration': html.escape,
|
||||||
'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
|
'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
|
||||||
'size': html.escape,
|
'size': html.escape,
|
||||||
'author': html.escape,
|
'author': html.escape,
|
||||||
'author_url': lambda url: html.escape(URL_ORIGIN + url),
|
'author_url': lambda url: html.escape(util.URL_ORIGIN + url),
|
||||||
'views': html.escape,
|
'views': html.escape,
|
||||||
'subscriber_count': html.escape,
|
'subscriber_count': html.escape,
|
||||||
'badges': badges_html,
|
'badges': badges_html,
|
||||||
@ -645,7 +330,7 @@ def video_item_html(item, template, html_exclude=set()):
|
|||||||
html_ready = get_html_ready(item)
|
html_ready = get_html_ready(item)
|
||||||
|
|
||||||
html_ready['video_info'] = html.escape(json.dumps(video_info) )
|
html_ready['video_info'] = html.escape(json.dumps(video_info) )
|
||||||
html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
|
html_ready['url'] = util.URL_ORIGIN + "/watch?v=" + html_ready['id']
|
||||||
html_ready['datetime'] = '' #TODO
|
html_ready['datetime'] = '' #TODO
|
||||||
|
|
||||||
for key in html_exclude:
|
for key in html_exclude:
|
||||||
@ -658,7 +343,7 @@ def video_item_html(item, template, html_exclude=set()):
|
|||||||
def playlist_item_html(item, template, html_exclude=set()):
|
def playlist_item_html(item, template, html_exclude=set()):
|
||||||
html_ready = get_html_ready(item)
|
html_ready = get_html_ready(item)
|
||||||
|
|
||||||
html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
|
html_ready['url'] = util.URL_ORIGIN + "/playlist?list=" + html_ready['id']
|
||||||
html_ready['datetime'] = '' #TODO
|
html_ready['datetime'] = '' #TODO
|
||||||
|
|
||||||
for key in html_exclude:
|
for key in html_exclude:
|
||||||
@ -672,10 +357,6 @@ def playlist_item_html(item, template, html_exclude=set()):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update_query_string(query_string, items):
|
|
||||||
parameters = urllib.parse.parse_qs(query_string)
|
|
||||||
parameters.update(items)
|
|
||||||
return urllib.parse.urlencode(parameters, doseq=True)
|
|
||||||
|
|
||||||
page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
|
page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
|
||||||
current_page_button_template = Template('''<div class="page-button">$page</div>''')
|
current_page_button_template = Template('''<div class="page-button">$page</div>''')
|
||||||
@ -694,7 +375,7 @@ def page_buttons_html(current_page, estimated_pages, url, current_query_string):
|
|||||||
template = current_page_button_template
|
template = current_page_button_template
|
||||||
else:
|
else:
|
||||||
template = page_button_template
|
template = page_button_template
|
||||||
result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
|
result += template.substitute(page=page, href = url + "?" + util.update_query_string(current_query_string, {'page': [str(page)]}) )
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -723,15 +404,15 @@ def renderer_html(renderer, additional_info={}, current_query_string=''):
|
|||||||
return renderer_html(renderer['contents'][0], additional_info, current_query_string)
|
return renderer_html(renderer['contents'][0], additional_info, current_query_string)
|
||||||
|
|
||||||
if type == 'channelRenderer':
|
if type == 'channelRenderer':
|
||||||
info = renderer_info(renderer)
|
info = yt_data_extract.renderer_info(renderer)
|
||||||
html_ready = get_html_ready(info)
|
html_ready = get_html_ready(info)
|
||||||
html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
|
html_ready['url'] = util.URL_ORIGIN + "/channel/" + html_ready['id']
|
||||||
return medium_channel_item_template.substitute(html_ready)
|
return medium_channel_item_template.substitute(html_ready)
|
||||||
|
|
||||||
if type in ('movieRenderer', 'clarificationRenderer'):
|
if type in ('movieRenderer', 'clarificationRenderer'):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
info = renderer_info(renderer)
|
info = yt_data_extract.renderer_info(renderer)
|
||||||
info.update(additional_info)
|
info.update(additional_info)
|
||||||
html_exclude = set(additional_info.keys())
|
html_exclude = set(additional_info.keys())
|
||||||
if type == 'compactVideoRenderer':
|
if type == 'compactVideoRenderer':
|
@ -1,11 +1,12 @@
|
|||||||
|
from youtube.template import Template
|
||||||
|
from youtube import util, html_common
|
||||||
|
import settings
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from youtube.template import Template
|
|
||||||
from youtube import common
|
|
||||||
import html
|
import html
|
||||||
import gevent
|
import gevent
|
||||||
import urllib
|
import urllib
|
||||||
import settings
|
|
||||||
|
|
||||||
playlists_directory = os.path.join(settings.data_dir, "playlists")
|
playlists_directory = os.path.join(settings.data_dir, "playlists")
|
||||||
thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
|
thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
|
||||||
@ -38,7 +39,7 @@ def download_thumbnail(playlist_name, video_id):
|
|||||||
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||||
save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
|
save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
|
||||||
try:
|
try:
|
||||||
thumbnail = common.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
|
thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
print("Failed to download thumbnail for " + video_id + ": " + str(e))
|
print("Failed to download thumbnail for " + video_id + ": " + str(e))
|
||||||
return
|
return
|
||||||
@ -78,15 +79,15 @@ def get_local_playlist_page(name):
|
|||||||
if info['id'] + ".jpg" in thumbnails:
|
if info['id'] + ".jpg" in thumbnails:
|
||||||
info['thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info['id'] + ".jpg"
|
info['thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info['id'] + ".jpg"
|
||||||
else:
|
else:
|
||||||
info['thumbnail'] = common.get_thumbnail_url(info['id'])
|
info['thumbnail'] = util.get_thumbnail_url(info['id'])
|
||||||
missing_thumbnails.append(info['id'])
|
missing_thumbnails.append(info['id'])
|
||||||
videos_html += common.video_item_html(info, common.small_video_item_template)
|
videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
|
||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
gevent.spawn(download_thumbnails, name, missing_thumbnails)
|
gevent.spawn(download_thumbnails, name, missing_thumbnails)
|
||||||
return local_playlist_template.substitute(
|
return local_playlist_template.substitute(
|
||||||
page_title = name + ' - Local playlist',
|
page_title = name + ' - Local playlist',
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
videos = videos_html,
|
videos = videos_html,
|
||||||
title = name,
|
title = name,
|
||||||
page_buttons = ''
|
page_buttons = ''
|
||||||
@ -127,11 +128,11 @@ def get_playlists_list_page():
|
|||||||
page = '''<ul>\n'''
|
page = '''<ul>\n'''
|
||||||
list_item_template = Template(''' <li><a href="$url">$name</a></li>\n''')
|
list_item_template = Template(''' <li><a href="$url">$name</a></li>\n''')
|
||||||
for name in get_playlist_names():
|
for name in get_playlist_names():
|
||||||
page += list_item_template.substitute(url = html.escape(common.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
|
page += list_item_template.substitute(url = html.escape(util.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
|
||||||
page += '''</ul>\n'''
|
page += '''</ul>\n'''
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Local playlists",
|
page_title = "Local playlists",
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
style = '',
|
style = '',
|
||||||
page = page,
|
page = page,
|
||||||
)
|
)
|
||||||
@ -151,7 +152,7 @@ def path_edit_playlist(env, start_response):
|
|||||||
if parameters['action'][0] == 'remove':
|
if parameters['action'][0] == 'remove':
|
||||||
playlist_name = env['path_parts'][1]
|
playlist_name = env['path_parts'][1]
|
||||||
remove_from_playlist(playlist_name, parameters['video_info_list'])
|
remove_from_playlist(playlist_name, parameters['video_info_list'])
|
||||||
start_response('303 See Other', [('Location', common.URL_ORIGIN + env['PATH_INFO']),] )
|
start_response('303 See Other', [('Location', util.URL_ORIGIN + env['PATH_INFO']),] )
|
||||||
return b''
|
return b''
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
|
from youtube import util, yt_data_extract, html_common, template, proto
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import youtube.common as common
|
|
||||||
import urllib
|
import urllib
|
||||||
import json
|
import json
|
||||||
import string
|
import string
|
||||||
from youtube import template
|
|
||||||
import youtube.proto as proto
|
|
||||||
import gevent
|
import gevent
|
||||||
import math
|
import math
|
||||||
|
|
||||||
@ -49,10 +48,10 @@ headers_1 = (
|
|||||||
|
|
||||||
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
|
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
|
||||||
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
|
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
|
||||||
content = common.fetch_url(url, common.mobile_ua + headers_1, report_text=report_text)
|
content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text)
|
||||||
'''with open('debug/playlist_debug', 'wb') as f:
|
'''with open('debug/playlist_debug', 'wb') as f:
|
||||||
f.write(content)'''
|
f.write(content)'''
|
||||||
content = json.loads(common.uppercase_escape(content.decode('utf-8')))
|
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -69,11 +68,11 @@ def get_videos(playlist_id, page):
|
|||||||
'X-YouTube-Client-Version': '2.20180508',
|
'X-YouTube-Client-Version': '2.20180508',
|
||||||
}
|
}
|
||||||
|
|
||||||
content = common.fetch_url(url, headers, report_text="Retrieved playlist")
|
content = util.fetch_url(url, headers, report_text="Retrieved playlist")
|
||||||
'''with open('debug/playlist_debug', 'wb') as f:
|
'''with open('debug/playlist_debug', 'wb') as f:
|
||||||
f.write(content)'''
|
f.write(content)'''
|
||||||
|
|
||||||
info = json.loads(common.uppercase_escape(content.decode('utf-8')))
|
info = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
@ -101,22 +100,22 @@ def get_playlist_page(env, start_response):
|
|||||||
video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
|
video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
|
||||||
videos_html = ''
|
videos_html = ''
|
||||||
for video_json in video_list:
|
for video_json in video_list:
|
||||||
info = common.renderer_info(video_json['playlistVideoRenderer'])
|
info = yt_data_extract.renderer_info(video_json['playlistVideoRenderer'])
|
||||||
videos_html += common.video_item_html(info, common.small_video_item_template)
|
videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
|
||||||
|
|
||||||
|
|
||||||
metadata = common.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
|
metadata = yt_data_extract.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
|
||||||
video_count = int(metadata['size'].replace(',', ''))
|
video_count = int(metadata['size'].replace(',', ''))
|
||||||
page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
|
page_buttons = html_common.page_buttons_html(int(page), math.ceil(video_count/20), util.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
|
||||||
|
|
||||||
html_ready = common.get_html_ready(metadata)
|
html_ready = html_common.get_html_ready(metadata)
|
||||||
html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
|
html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
|
||||||
|
|
||||||
stats = ''
|
stats = ''
|
||||||
stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
|
stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
|
||||||
stats += playlist_stat_template.substitute(stat=html_ready['views'])
|
stats += playlist_stat_template.substitute(stat=html_ready['views'])
|
||||||
return yt_playlist_template.substitute(
|
return yt_playlist_template.substitute(
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
videos = videos_html,
|
videos = videos_html,
|
||||||
page_buttons = page_buttons,
|
page_buttons = page_buttons,
|
||||||
stats = stats,
|
stats = stats,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
# Contains functions having to do with posting/editing/deleting comments
|
# Contains functions having to do with posting/editing/deleting comments
|
||||||
|
from youtube import util, html_common, proto, comments, accounts
|
||||||
|
import settings
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
import json
|
import json
|
||||||
from youtube import common, proto, comments, accounts
|
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
import settings
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
def _post_comment(text, video_id, session_token, cookiejar):
|
def _post_comment(text, video_id, session_token, cookiejar):
|
||||||
@ -31,7 +31,7 @@ def _post_comment(text, video_id, session_token, cookiejar):
|
|||||||
data = urllib.parse.urlencode(data_dict).encode()
|
data = urllib.parse.urlencode(data_dict).encode()
|
||||||
|
|
||||||
|
|
||||||
content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
||||||
|
|
||||||
code = json.loads(content)['code']
|
code = json.loads(content)['code']
|
||||||
print("Comment posting code: " + code)
|
print("Comment posting code: " + code)
|
||||||
@ -62,7 +62,7 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie
|
|||||||
}
|
}
|
||||||
data = urllib.parse.urlencode(data_dict).encode()
|
data = urllib.parse.urlencode(data_dict).encode()
|
||||||
|
|
||||||
content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
||||||
|
|
||||||
code = json.loads(content)['code']
|
code = json.loads(content)['code']
|
||||||
print("Comment posting code: " + code)
|
print("Comment posting code: " + code)
|
||||||
@ -90,7 +90,7 @@ def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
|
|||||||
}
|
}
|
||||||
data = urllib.parse.urlencode(data_dict).encode()
|
data = urllib.parse.urlencode(data_dict).encode()
|
||||||
|
|
||||||
content = common.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
content = util.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
||||||
code = json.loads(content)['code']
|
code = json.loads(content)['code']
|
||||||
print("Comment deletion code: " + code)
|
print("Comment deletion code: " + code)
|
||||||
return code
|
return code
|
||||||
@ -101,8 +101,8 @@ def get_session_token(video_id, cookiejar):
|
|||||||
# youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr"
|
# youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr"
|
||||||
# Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests
|
# Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests
|
||||||
# Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered.
|
# Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered.
|
||||||
headers = {'User-Agent': common.mobile_user_agent}
|
headers = {'User-Agent': util.mobile_user_agent}
|
||||||
mobile_page = common.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
|
mobile_page = util.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
|
||||||
match = xsrf_token_regex.search(mobile_page)
|
match = xsrf_token_regex.search(mobile_page)
|
||||||
if match:
|
if match:
|
||||||
return match.group(1).replace("%3D", "=")
|
return match.group(1).replace("%3D", "=")
|
||||||
@ -118,9 +118,9 @@ def delete_comment(env, start_response):
|
|||||||
code = _delete_comment(video_id, parameters['comment_id'][0], parameters['author_id'][0], token, cookiejar)
|
code = _delete_comment(video_id, parameters['comment_id'][0], parameters['author_id'][0], token, cookiejar)
|
||||||
|
|
||||||
if code == "SUCCESS":
|
if code == "SUCCESS":
|
||||||
start_response('303 See Other', [('Location', common.URL_ORIGIN + '/comment_delete_success'),] )
|
start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_success'),] )
|
||||||
else:
|
else:
|
||||||
start_response('303 See Other', [('Location', common.URL_ORIGIN + '/comment_delete_fail'),] )
|
start_response('303 See Other', [('Location', util.URL_ORIGIN + '/comment_delete_fail'),] )
|
||||||
|
|
||||||
def post_comment(env, start_response):
|
def post_comment(env, start_response):
|
||||||
parameters = env['parameters']
|
parameters = env['parameters']
|
||||||
@ -131,11 +131,11 @@ def post_comment(env, start_response):
|
|||||||
|
|
||||||
if 'parent_id' in parameters:
|
if 'parent_id' in parameters:
|
||||||
code = _post_comment_reply(parameters['comment_text'][0], parameters['video_id'][0], parameters['parent_id'][0], token, cookiejar)
|
code = _post_comment_reply(parameters['comment_text'][0], parameters['video_id'][0], parameters['parent_id'][0], token, cookiejar)
|
||||||
start_response('303 See Other', (('Location', common.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
|
start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
|
||||||
|
|
||||||
else:
|
else:
|
||||||
code = _post_comment(parameters['comment_text'][0], parameters['video_id'][0], token, cookiejar)
|
code = _post_comment(parameters['comment_text'][0], parameters['video_id'][0], token, cookiejar)
|
||||||
start_response('303 See Other', (('Location', common.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
|
start_response('303 See Other', (('Location', util.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
|
||||||
|
|
||||||
return b''
|
return b''
|
||||||
|
|
||||||
@ -163,10 +163,10 @@ def get_delete_comment_page(env, start_response):
|
|||||||
page += '''
|
page += '''
|
||||||
<input type="submit" value="Yes, delete it">
|
<input type="submit" value="Yes, delete it">
|
||||||
</form>'''
|
</form>'''
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Delete comment?",
|
page_title = "Delete comment?",
|
||||||
style = style,
|
style = style,
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
page = page,
|
page = page,
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
|
||||||
@ -174,7 +174,7 @@ def get_post_comment_page(env, start_response):
|
|||||||
start_response('200 OK', [('Content-type','text/html'),])
|
start_response('200 OK', [('Content-type','text/html'),])
|
||||||
parameters = env['parameters']
|
parameters = env['parameters']
|
||||||
video_id = parameters['video_id'][0]
|
video_id = parameters['video_id'][0]
|
||||||
parent_id = common.default_multi_get(parameters, 'parent_id', 0, default='')
|
parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='')
|
||||||
|
|
||||||
style = ''' main{
|
style = ''' main{
|
||||||
display: grid;
|
display: grid;
|
||||||
@ -194,23 +194,23 @@ textarea{
|
|||||||
}'''
|
}'''
|
||||||
if parent_id: # comment reply
|
if parent_id: # comment reply
|
||||||
comment_box = comments.comment_box_template.substitute(
|
comment_box = comments.comment_box_template.substitute(
|
||||||
form_action = common.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
|
form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
|
||||||
video_id_input = '',
|
video_id_input = '',
|
||||||
post_text = "Post reply",
|
post_text = "Post reply",
|
||||||
options=comments.comment_box_account_options(),
|
options=comments.comment_box_account_options(),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
comment_box = comments.comment_box_template.substitute(
|
comment_box = comments.comment_box_template.substitute(
|
||||||
form_action = common.URL_ORIGIN + '/post_comment',
|
form_action = util.URL_ORIGIN + '/post_comment',
|
||||||
video_id_input = '''<input type="hidden" name="video_id" value="''' + video_id + '''">''',
|
video_id_input = '''<input type="hidden" name="video_id" value="''' + video_id + '''">''',
|
||||||
post_text = "Post comment",
|
post_text = "Post comment",
|
||||||
options=comments.comment_box_account_options(),
|
options=comments.comment_box_account_options(),
|
||||||
)
|
)
|
||||||
|
|
||||||
page = '''<div class="left">\n''' + comment_box + '''</div>\n'''
|
page = '''<div class="left">\n''' + comment_box + '''</div>\n'''
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Post comment reply" if parent_id else "Post a comment",
|
page_title = "Post comment reply" if parent_id else "Post a comment",
|
||||||
style = style,
|
style = style,
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
page = page,
|
page = page,
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
|
from youtube import util, html_common, yt_data_extract, proto
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import urllib
|
import urllib
|
||||||
import html
|
import html
|
||||||
from string import Template
|
from string import Template
|
||||||
import base64
|
import base64
|
||||||
from math import ceil
|
from math import ceil
|
||||||
from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
|
|
||||||
from youtube import common, proto
|
|
||||||
|
|
||||||
with open("yt_search_results_template.html", "r") as file:
|
with open("yt_search_results_template.html", "r") as file:
|
||||||
yt_search_results_template = file.read()
|
yt_search_results_template = file.read()
|
||||||
@ -54,7 +55,7 @@ def get_search_json(query, page, autocorrect, sort, filters):
|
|||||||
'X-YouTube-Client-Version': '2.20180418',
|
'X-YouTube-Client-Version': '2.20180418',
|
||||||
}
|
}
|
||||||
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
|
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
|
||||||
content = common.fetch_url(url, headers=headers, report_text="Got search results")
|
content = util.fetch_url(url, headers=headers, report_text="Got search results")
|
||||||
info = json.loads(content)
|
info = json.loads(content)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
@ -70,9 +71,9 @@ def get_search_page(env, start_response):
|
|||||||
start_response('200 OK', [('Content-type','text/html'),])
|
start_response('200 OK', [('Content-type','text/html'),])
|
||||||
parameters = env['parameters']
|
parameters = env['parameters']
|
||||||
if len(parameters) == 0:
|
if len(parameters) == 0:
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Search",
|
page_title = "Search",
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
style = '',
|
style = '',
|
||||||
page = '',
|
page = '',
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
@ -100,24 +101,24 @@ def get_search_page(env, start_response):
|
|||||||
renderer = renderer[type]
|
renderer = renderer[type]
|
||||||
corrected_query_string = parameters.copy()
|
corrected_query_string = parameters.copy()
|
||||||
corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
|
corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
|
||||||
corrected_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
|
corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
|
||||||
corrections = did_you_mean.substitute(
|
corrections = did_you_mean.substitute(
|
||||||
corrected_query_url = corrected_query_url,
|
corrected_query_url = corrected_query_url,
|
||||||
corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
|
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
if type == 'showingResultsForRenderer':
|
if type == 'showingResultsForRenderer':
|
||||||
renderer = renderer[type]
|
renderer = renderer[type]
|
||||||
no_autocorrect_query_string = parameters.copy()
|
no_autocorrect_query_string = parameters.copy()
|
||||||
no_autocorrect_query_string['autocorrect'] = ['0']
|
no_autocorrect_query_string['autocorrect'] = ['0']
|
||||||
no_autocorrect_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
|
no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
|
||||||
corrections = showing_results_for.substitute(
|
corrections = showing_results_for.substitute(
|
||||||
corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
|
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
|
||||||
original_query_url = no_autocorrect_query_url,
|
original_query_url = no_autocorrect_query_url,
|
||||||
original_query = html.escape(renderer['originalQuery']['simpleText']),
|
original_query = html.escape(renderer['originalQuery']['simpleText']),
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
result_list_html += common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
|
result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
|
||||||
|
|
||||||
page = int(page)
|
page = int(page)
|
||||||
if page <= 5:
|
if page <= 5:
|
||||||
@ -129,13 +130,13 @@ def get_search_page(env, start_response):
|
|||||||
|
|
||||||
|
|
||||||
result = Template(yt_search_results_template).substitute(
|
result = Template(yt_search_results_template).substitute(
|
||||||
header = common.get_header(query),
|
header = html_common.get_header(query),
|
||||||
results = result_list_html,
|
results = result_list_html,
|
||||||
page_title = query + " - Search",
|
page_title = query + " - Search",
|
||||||
search_box_value = html.escape(query),
|
search_box_value = html.escape(query),
|
||||||
number_of_results = '{:,}'.format(estimated_results),
|
number_of_results = '{:,}'.format(estimated_results),
|
||||||
number_of_pages = '{:,}'.format(estimated_pages),
|
number_of_pages = '{:,}'.format(estimated_pages),
|
||||||
page_buttons = common.page_buttons_html(page, estimated_pages, URL_ORIGIN + "/search", env['QUERY_STRING']),
|
page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']),
|
||||||
corrections = corrections
|
corrections = corrections
|
||||||
)
|
)
|
||||||
return result.encode('utf-8')
|
return result.encode('utf-8')
|
||||||
|
153
youtube/util.py
Normal file
153
youtube/util.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
import socks, sockshandler
|
||||||
|
import gzip
|
||||||
|
import brotli
|
||||||
|
import urllib.parse
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import settings
|
||||||
|
|
||||||
|
|
||||||
|
URL_ORIGIN = "/https://www.youtube.com"
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
||||||
|
'''Separate cookiejars for receiving and sending'''
|
||||||
|
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
||||||
|
import http.cookiejar
|
||||||
|
self.cookiejar_send = cookiejar_send
|
||||||
|
self.cookiejar_receive = cookiejar_receive
|
||||||
|
|
||||||
|
def http_request(self, request):
|
||||||
|
if self.cookiejar_send is not None:
|
||||||
|
self.cookiejar_send.add_cookie_header(request)
|
||||||
|
return request
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
if self.cookiejar_receive is not None:
|
||||||
|
self.cookiejar_receive.extract_cookies(response, request)
|
||||||
|
return response
|
||||||
|
|
||||||
|
https_request = http_request
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
|
def decode_content(content, encoding_header):
|
||||||
|
encodings = encoding_header.replace(' ', '').split(',')
|
||||||
|
for encoding in reversed(encodings):
|
||||||
|
if encoding == 'identity':
|
||||||
|
continue
|
||||||
|
if encoding == 'br':
|
||||||
|
content = brotli.decompress(content)
|
||||||
|
elif encoding == 'gzip':
|
||||||
|
content = gzip.decompress(content)
|
||||||
|
return content
|
||||||
|
|
||||||
|
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
|
||||||
|
'''
|
||||||
|
When cookiejar_send is set to a CookieJar object,
|
||||||
|
those cookies will be sent in the request (but cookies in response will not be merged into it)
|
||||||
|
When cookiejar_receive is set to a CookieJar object,
|
||||||
|
cookies received in the response will be merged into the object (nothing will be sent from it)
|
||||||
|
When both are set to the same object, cookies will be sent from the object,
|
||||||
|
and response cookies will be merged into it.
|
||||||
|
'''
|
||||||
|
headers = dict(headers) # Note: Calling dict() on a dict will make a copy
|
||||||
|
headers['Accept-Encoding'] = 'gzip, br'
|
||||||
|
|
||||||
|
# prevent python version being leaked by urllib if User-Agent isn't provided
|
||||||
|
# (urllib will use ex. Python-urllib/3.6 otherwise)
|
||||||
|
if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
|
||||||
|
headers['User-Agent'] = 'Python-urllib'
|
||||||
|
|
||||||
|
if data is not None:
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = data.encode('ascii')
|
||||||
|
elif not isinstance(data, bytes):
|
||||||
|
data = urllib.parse.urlencode(data).encode('ascii')
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, data=data, headers=headers)
|
||||||
|
|
||||||
|
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
|
||||||
|
|
||||||
|
if use_tor and settings.route_tor:
|
||||||
|
opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
|
||||||
|
else:
|
||||||
|
opener = urllib.request.build_opener(cookie_processor)
|
||||||
|
|
||||||
|
response = opener.open(req, timeout=timeout)
|
||||||
|
response_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
content = response.read()
|
||||||
|
read_finish = time.time()
|
||||||
|
if report_text:
|
||||||
|
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
||||||
|
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
||||||
|
return content
|
||||||
|
|
||||||
|
mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
|
||||||
|
mobile_ua = (('User-Agent', mobile_user_agent),)
|
||||||
|
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
||||||
|
desktop_ua = (('User-Agent', desktop_user_agent),)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def dict_add(*dicts):
|
||||||
|
for dictionary in dicts[1:]:
|
||||||
|
dicts[0].update(dictionary)
|
||||||
|
return dicts[0]
|
||||||
|
|
||||||
|
def video_id(url):
|
||||||
|
url_parts = urllib.parse.urlparse(url)
|
||||||
|
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
||||||
|
|
||||||
|
def default_multi_get(object, *keys, default):
|
||||||
|
''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
|
||||||
|
try:
|
||||||
|
for key in keys:
|
||||||
|
object = object[key]
|
||||||
|
return object
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
# default, sddefault, mqdefault, hqdefault, hq720
|
||||||
|
def get_thumbnail_url(video_id):
|
||||||
|
return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
|
||||||
|
|
||||||
|
def seconds_to_timestamp(seconds):
|
||||||
|
seconds = int(seconds)
|
||||||
|
hours, seconds = divmod(seconds,3600)
|
||||||
|
minutes, seconds = divmod(seconds,60)
|
||||||
|
if hours != 0:
|
||||||
|
timestamp = str(hours) + ":"
|
||||||
|
timestamp += str(minutes).zfill(2) # zfill pads with zeros
|
||||||
|
else:
|
||||||
|
timestamp = str(minutes)
|
||||||
|
|
||||||
|
timestamp += ":" + str(seconds).zfill(2)
|
||||||
|
return timestamp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def update_query_string(query_string, items):
|
||||||
|
parameters = urllib.parse.parse_qs(query_string)
|
||||||
|
parameters.update(items)
|
||||||
|
return urllib.parse.urlencode(parameters, doseq=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase_escape(s):
|
||||||
|
return re.sub(
|
||||||
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
|
lambda m: chr(int(m.group(1), base=16)), s)
|
@ -1,12 +1,12 @@
|
|||||||
|
from youtube import util, html_common, comments
|
||||||
|
|
||||||
from youtube_dl.YoutubeDL import YoutubeDL
|
from youtube_dl.YoutubeDL import YoutubeDL
|
||||||
from youtube_dl.extractor.youtube import YoutubeError
|
from youtube_dl.extractor.youtube import YoutubeError
|
||||||
import json
|
import json
|
||||||
import urllib
|
import urllib
|
||||||
from string import Template
|
from string import Template
|
||||||
import html
|
import html
|
||||||
import youtube.common as common
|
|
||||||
from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
|
|
||||||
import youtube.comments as comments
|
|
||||||
import gevent
|
import gevent
|
||||||
import settings
|
import settings
|
||||||
import os
|
import os
|
||||||
@ -127,9 +127,9 @@ def get_related_items_html(info):
|
|||||||
result = ""
|
result = ""
|
||||||
for item in info['related_vids']:
|
for item in info['related_vids']:
|
||||||
if 'list' in item: # playlist:
|
if 'list' in item: # playlist:
|
||||||
result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
|
result += html_common.small_playlist_item_html(watch_page_related_playlist_info(item))
|
||||||
else:
|
else:
|
||||||
result += common.small_video_item_html(watch_page_related_video_info(item))
|
result += html_common.small_video_item_html(watch_page_related_video_info(item))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -137,7 +137,7 @@ def get_related_items_html(info):
|
|||||||
# converts these to standard names
|
# converts these to standard names
|
||||||
def watch_page_related_video_info(item):
|
def watch_page_related_video_info(item):
|
||||||
result = {key: item[key] for key in ('id', 'title', 'author')}
|
result = {key: item[key] for key in ('id', 'title', 'author')}
|
||||||
result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
|
result['duration'] = util.seconds_to_timestamp(item['length_seconds'])
|
||||||
try:
|
try:
|
||||||
result['views'] = item['short_view_count_text']
|
result['views'] = item['short_view_count_text']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@ -155,9 +155,9 @@ def watch_page_related_playlist_info(item):
|
|||||||
|
|
||||||
def sort_formats(info):
|
def sort_formats(info):
|
||||||
sorted_formats = info['formats'].copy()
|
sorted_formats = info['formats'].copy()
|
||||||
sorted_formats.sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
|
sorted_formats.sort(key=lambda x: util.default_multi_get(_formats, x['format_id'], 'height', default=0))
|
||||||
for index, format in enumerate(sorted_formats):
|
for index, format in enumerate(sorted_formats):
|
||||||
if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
|
if util.default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
|
||||||
break
|
break
|
||||||
sorted_formats = sorted_formats[index:] + sorted_formats[0:index]
|
sorted_formats = sorted_formats[index:] + sorted_formats[0:index]
|
||||||
sorted_formats = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
|
sorted_formats = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
|
||||||
@ -236,7 +236,7 @@ def get_watch_page(env, start_response):
|
|||||||
|
|
||||||
start_response('200 OK', [('Content-type','text/html'),])
|
start_response('200 OK', [('Content-type','text/html'),])
|
||||||
|
|
||||||
lc = common.default_multi_get(env['parameters'], 'lc', 0, default='')
|
lc = util.default_multi_get(env['parameters'], 'lc', 0, default='')
|
||||||
if settings.route_tor:
|
if settings.route_tor:
|
||||||
proxy = 'socks5://127.0.0.1:9150/'
|
proxy = 'socks5://127.0.0.1:9150/'
|
||||||
else:
|
else:
|
||||||
@ -256,17 +256,17 @@ def get_watch_page(env, start_response):
|
|||||||
#chosen_format = choose_format(info)
|
#chosen_format = choose_format(info)
|
||||||
|
|
||||||
if isinstance(info, str): # youtube error
|
if isinstance(info, str): # youtube error
|
||||||
return common.yt_basic_template.substitute(
|
return html_common.yt_basic_template.substitute(
|
||||||
page_title = "Error",
|
page_title = "Error",
|
||||||
style = "",
|
style = "",
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
page = html.escape(info),
|
page = html.escape(info),
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
|
||||||
sorted_formats = sort_formats(info)
|
sorted_formats = sort_formats(info)
|
||||||
|
|
||||||
video_info = {
|
video_info = {
|
||||||
"duration": common.seconds_to_timestamp(info["duration"]),
|
"duration": util.seconds_to_timestamp(info["duration"]),
|
||||||
"id": info['id'],
|
"id": info['id'],
|
||||||
"title": info['title'],
|
"title": info['title'],
|
||||||
"author": info['uploader'],
|
"author": info['uploader'],
|
||||||
@ -338,7 +338,7 @@ def get_watch_page(env, start_response):
|
|||||||
page = yt_watch_template.substitute(
|
page = yt_watch_template.substitute(
|
||||||
video_title = html.escape(info["title"]),
|
video_title = html.escape(info["title"]),
|
||||||
page_title = html.escape(info["title"]),
|
page_title = html.escape(info["title"]),
|
||||||
header = common.get_header(),
|
header = html_common.get_header(),
|
||||||
uploader = html.escape(info["uploader"]),
|
uploader = html.escape(info["uploader"]),
|
||||||
uploader_channel_url = '/' + info["uploader_url"],
|
uploader_channel_url = '/' + info["uploader_url"],
|
||||||
upload_date = upload_date,
|
upload_date = upload_date,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import mimetypes
|
import mimetypes
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import os
|
import os
|
||||||
from youtube import local_playlist, watch, search, playlist, channel, comments, common, post_comment, accounts
|
from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util
|
||||||
import settings
|
import settings
|
||||||
YOUTUBE_FILES = (
|
YOUTUBE_FILES = (
|
||||||
"/shared.css",
|
"/shared.css",
|
||||||
@ -64,7 +64,7 @@ def youtube(env, start_response):
|
|||||||
|
|
||||||
elif path.startswith("/api/"):
|
elif path.startswith("/api/"):
|
||||||
start_response('200 OK', [('Content-type', 'text/vtt'),] )
|
start_response('200 OK', [('Content-type', 'text/vtt'),] )
|
||||||
result = common.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
|
result = util.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
|
||||||
result = result.replace(b"align:start position:0%", b"")
|
result = result.replace(b"align:start position:0%", b"")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
205
youtube/yt_data_extract.py
Normal file
205
youtube/yt_data_extract.py
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
import html
|
||||||
|
|
||||||
|
# videos (all of type str):
|
||||||
|
|
||||||
|
# id
|
||||||
|
# title
|
||||||
|
# url
|
||||||
|
# author
|
||||||
|
# author_url
|
||||||
|
# thumbnail
|
||||||
|
# description
|
||||||
|
# published
|
||||||
|
# duration
|
||||||
|
# likes
|
||||||
|
# dislikes
|
||||||
|
# views
|
||||||
|
# playlist_index
|
||||||
|
|
||||||
|
# playlists:
|
||||||
|
|
||||||
|
# id
|
||||||
|
# title
|
||||||
|
# url
|
||||||
|
# author
|
||||||
|
# author_url
|
||||||
|
# thumbnail
|
||||||
|
# description
|
||||||
|
# updated
|
||||||
|
# size
|
||||||
|
# first_video_id
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_plain_text(node):
|
||||||
|
try:
|
||||||
|
return html.escape(node['simpleText'])
|
||||||
|
except KeyError:
|
||||||
|
return unformmated_text_runs(node['runs'])
|
||||||
|
|
||||||
|
def unformmated_text_runs(runs):
|
||||||
|
result = ''
|
||||||
|
for text_run in runs:
|
||||||
|
result += html.escape(text_run["text"])
|
||||||
|
return result
|
||||||
|
|
||||||
|
def format_text_runs(runs):
|
||||||
|
if isinstance(runs, str):
|
||||||
|
return runs
|
||||||
|
result = ''
|
||||||
|
for text_run in runs:
|
||||||
|
if text_run.get("bold", False):
|
||||||
|
result += "<b>" + html.escape(text_run["text"]) + "</b>"
|
||||||
|
elif text_run.get('italics', False):
|
||||||
|
result += "<i>" + html.escape(text_run["text"]) + "</i>"
|
||||||
|
else:
|
||||||
|
result += html.escape(text_run["text"])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_url(node):
|
||||||
|
try:
|
||||||
|
return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
||||||
|
except KeyError:
|
||||||
|
return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
||||||
|
|
||||||
|
|
||||||
|
def get_text(node):
|
||||||
|
try:
|
||||||
|
return node['simpleText']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
return node['runs'][0]['text']
|
||||||
|
except IndexError: # empty text runs
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def get_formatted_text(node):
|
||||||
|
try:
|
||||||
|
return node['runs']
|
||||||
|
except KeyError:
|
||||||
|
return node['simpleText']
|
||||||
|
|
||||||
|
def get_badges(node):
|
||||||
|
badges = []
|
||||||
|
for badge_node in node:
|
||||||
|
badge = badge_node['metadataBadgeRenderer']['label']
|
||||||
|
if badge.lower() != 'new':
|
||||||
|
badges.append(badge)
|
||||||
|
return badges
|
||||||
|
|
||||||
|
def get_thumbnail(node):
|
||||||
|
try:
|
||||||
|
return node['thumbnails'][0]['url'] # polymer format
|
||||||
|
except KeyError:
|
||||||
|
return node['url'] # ajax format
|
||||||
|
|
||||||
|
dispatch = {
|
||||||
|
|
||||||
|
# polymer format
|
||||||
|
'title': ('title', get_text),
|
||||||
|
'publishedTimeText': ('published', get_text),
|
||||||
|
'videoId': ('id', lambda node: node),
|
||||||
|
'descriptionSnippet': ('description', get_formatted_text),
|
||||||
|
'lengthText': ('duration', get_text),
|
||||||
|
'thumbnail': ('thumbnail', get_thumbnail),
|
||||||
|
'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
|
||||||
|
|
||||||
|
'viewCountText': ('views', get_text),
|
||||||
|
'numVideosText': ('size', lambda node: get_text(node).split(' ')[0]), # the format is "324 videos"
|
||||||
|
'videoCountText': ('size', get_text),
|
||||||
|
'playlistId': ('id', lambda node: node),
|
||||||
|
'descriptionText': ('description', get_formatted_text),
|
||||||
|
|
||||||
|
'subscriberCountText': ('subscriber_count', get_text),
|
||||||
|
'channelId': ('id', lambda node: node),
|
||||||
|
'badges': ('badges', get_badges),
|
||||||
|
|
||||||
|
# ajax format
|
||||||
|
'view_count_text': ('views', get_text),
|
||||||
|
'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
|
||||||
|
'owner_text': ('author', get_text),
|
||||||
|
'owner_endpoint': ('author_url', lambda node: node['url']),
|
||||||
|
'description': ('description', get_formatted_text),
|
||||||
|
'index': ('playlist_index', get_text),
|
||||||
|
'short_byline': ('author', get_text),
|
||||||
|
'length': ('duration', get_text),
|
||||||
|
'video_id': ('id', lambda node: node),
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def renderer_info(renderer):
|
||||||
|
try:
|
||||||
|
info = {}
|
||||||
|
if 'viewCountText' in renderer: # prefer this one as it contains all the digits
|
||||||
|
info['views'] = get_text(renderer['viewCountText'])
|
||||||
|
elif 'shortViewCountText' in renderer:
|
||||||
|
info['views'] = get_text(renderer['shortViewCountText'])
|
||||||
|
|
||||||
|
if 'ownerText' in renderer:
|
||||||
|
info['author'] = renderer['ownerText']['runs'][0]['text']
|
||||||
|
info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
||||||
|
try:
|
||||||
|
overlays = renderer['thumbnailOverlays']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for overlay in overlays:
|
||||||
|
if 'thumbnailOverlayTimeStatusRenderer' in overlay:
|
||||||
|
info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
|
||||||
|
# show renderers don't have videoCountText
|
||||||
|
elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
|
||||||
|
info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
|
||||||
|
|
||||||
|
# show renderers don't have playlistId, have to dig into the url to get it
|
||||||
|
try:
|
||||||
|
info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
for key, node in renderer.items():
|
||||||
|
if key in ('longBylineText', 'shortBylineText'):
|
||||||
|
info['author'] = get_text(node)
|
||||||
|
try:
|
||||||
|
info['author_url'] = get_url(node)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
|
||||||
|
elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
|
||||||
|
info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
simple_key, function = dispatch[key]
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
info[simple_key] = function(node)
|
||||||
|
return info
|
||||||
|
except KeyError:
|
||||||
|
print(renderer)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def ajax_info(item_json):
|
||||||
|
try:
|
||||||
|
info = {}
|
||||||
|
for key, node in item_json.items():
|
||||||
|
try:
|
||||||
|
simple_key, function = dispatch[key]
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
info[simple_key] = function(node)
|
||||||
|
return info
|
||||||
|
except KeyError:
|
||||||
|
print(item_json)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user