Remove ad-hoc response saving from code, create a debug setting for fetch_url

This commit is contained in:
James Taylor 2019-07-23 23:53:04 -07:00
parent cb1c899a45
commit e00c3cf99f
8 changed files with 33 additions and 46 deletions

View File

@ -24,9 +24,12 @@ default_comment_sorting = 0
# developer use to debug 403s # developer use to debug 403s
gather_googlevideo_domains = False gather_googlevideo_domains = False
# save all responses from youtube for debugging
debugging_save_responses = False
''' '''
exec(default_settings) exec(default_settings)
allowed_targets = set(("route_tor", "port_number", "allow_foreign_addresses", "subtitles_mode", "subtitles_language", "enable_related_videos", "enable_comments", "enable_comment_avatars", "default_comment_sorting", "gather_googlevideo_domains")) allowed_targets = set(("route_tor", "port_number", "allow_foreign_addresses", "subtitles_mode", "subtitles_language", "enable_related_videos", "enable_comments", "enable_comment_avatars", "default_comment_sorting", "gather_googlevideo_domains", "debugging_save_responses"))
def log_ignored_line(line_number, message): def log_ignored_line(line_number, message):
print("settings.txt: Ignoring line " + str(node.lineno) + " (" + message + ")") print("settings.txt: Ignoring line " + str(node.lineno) + " (" + message + ")")

View File

@ -162,10 +162,8 @@ def _login(username, password, cookiejar, use_tor):
Taken from youtube-dl Taken from youtube-dl
""" """
login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='login_page').decode('utf-8')
'''with open('debug/login_page', 'w', encoding='utf-8') as f:
f.write(login_page)'''
#print(cookiejar.as_lwp_str())
if login_page is False: if login_page is False:
return return
@ -189,10 +187,7 @@ def _login(username, password, cookiejar, use_tor):
'Google-Accounts-XSRF': 1, 'Google-Accounts-XSRF': 1,
} }
headers.update(yt_dl_headers) headers.update(yt_dl_headers)
result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name=note).decode('utf-8')
#print(cookiejar.as_lwp_str())
'''with open('debug/' + note, 'w', encoding='utf-8') as f:
f.write(result)'''
result = re.sub(r'^[^\[]*', '', result) result = re.sub(r'^[^\[]*', '', result)
return json.loads(result) return json.loads(result)
@ -321,12 +316,10 @@ def _login(username, password, cookiejar, use_tor):
return False return False
try: try:
check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8') check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='check_cookie_results').decode('utf-8')
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err: except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
return False return False
'''with open('debug/check_cookie_results', 'w', encoding='utf-8') as f:
f.write(check_cookie_results)'''
if 'https://myaccount.google.com/' not in check_cookie_results: if 'https://myaccount.google.com/' not in check_cookie_results:
warn('Unable to log in') warn('Unable to log in')

View File

@ -88,11 +88,9 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
print("Sending channel tab ajax request") print("Sending channel tab ajax request")
content = util.fetch_url(url, util.desktop_ua + headers_1) content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab')
print("Finished recieving channel tab response") print("Finished recieving channel tab response")
'''with open('debug/channel_debug', 'wb') as f:
f.write(content)'''
return content return content
def get_number_of_videos(channel_id): def get_number_of_videos(channel_id):
@ -103,15 +101,13 @@ def get_number_of_videos(channel_id):
# Sometimes retrieving playlist info fails with 403 for no discernable reason # Sometimes retrieving playlist info fails with 403 for no discernable reason
try: try:
response = util.fetch_url(url, util.mobile_ua + headers_pbj) response = util.fetch_url(url, util.mobile_ua + headers_pbj, debug_name='number_of_videos')
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
if e.code != 403: if e.code != 403:
raise raise
print("Couldn't retrieve number of videos") print("Couldn't retrieve number of videos")
return 1000 return 1000
'''with open('debug/playlist_debug_metadata', 'wb') as f:
f.write(response)'''
response = response.decode('utf-8') response = response.decode('utf-8')
print("Got response for number of videos") print("Got response for number of videos")
@ -135,9 +131,7 @@ def get_channel_search_json(channel_id, query, page):
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1) polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search')
'''with open('debug/channel_search_debug', 'wb') as f:
f.write(polymer_json)'''
return polymer_json return polymer_json
@ -293,9 +287,9 @@ def get_channel_page(channel_id, tab='videos'):
number_of_videos, polymer_json = tasks[0].value, tasks[1].value number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'about': elif tab == 'about':
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1) polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about')
elif tab == 'playlists': elif tab == 'playlists':
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1) polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists')
elif tab == 'search': elif tab == 'search':
tasks = ( tasks = (
gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_number_of_videos, channel_id ),
@ -336,13 +330,11 @@ def get_channel_page_general_url(base_url, tab, request):
query = request.args.get('query', '') query = request.args.get('query', '')
if tab == 'videos': if tab == 'videos':
polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1) polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
with open('debug/channel_debug', 'wb') as f:
f.write(polymer_json)
elif tab == 'about': elif tab == 'about':
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1) polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
elif tab == 'playlists': elif tab == 'playlists':
polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1) polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists')
elif tab == 'search': elif tab == 'search':
raise NotImplementedError() raise NotImplementedError()
else: else:

View File

@ -83,7 +83,7 @@ def request_comments(ctoken, replies=False):
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
for i in range(0,8): # don't retry more than 8 times for i in range(0,8): # don't retry more than 8 times
content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments')
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
content = content[4:] content = content[4:]
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
@ -91,8 +91,6 @@ def request_comments(ctoken, replies=False):
print("got <!DOCTYPE>, retrying") print("got <!DOCTYPE>, retrying")
continue continue
break break
'''with open('debug/comments_debug', 'wb') as f:
f.write(content)'''
return content return content

View File

@ -47,9 +47,7 @@ headers_1 = (
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"): def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text) content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page')
'''with open('debug/playlist_debug', 'wb') as f:
f.write(content)'''
content = json.loads(util.uppercase_escape(content.decode('utf-8'))) content = json.loads(util.uppercase_escape(content.decode('utf-8')))
return content return content
@ -67,9 +65,7 @@ def get_videos(playlist_id, page):
'X-YouTube-Client-Version': '2.20180508', 'X-YouTube-Client-Version': '2.20180508',
} }
content = util.fetch_url(url, headers, report_text="Retrieved playlist") content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')
'''with open('debug/playlist_debug', 'wb') as f:
f.write(content)'''
info = json.loads(util.uppercase_escape(content.decode('utf-8'))) info = json.loads(util.uppercase_escape(content.decode('utf-8')))
return info return info

View File

@ -35,13 +35,11 @@ def _post_comment(text, video_id, session_token, cookiejar):
data = urllib.parse.urlencode(data_dict).encode() data = urllib.parse.urlencode(data_dict).encode()
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_comment')
code = json.loads(content)['code'] code = json.loads(content)['code']
print("Comment posting code: " + code) print("Comment posting code: " + code)
return code return code
'''with open('debug/post_comment_response', 'wb') as f:
f.write(content)'''
def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar): def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar):
@ -66,13 +64,11 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie
} }
data = urllib.parse.urlencode(data_dict).encode() data = urllib.parse.urlencode(data_dict).encode()
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_reply')
code = json.loads(content)['code'] code = json.loads(content)['code']
print("Comment posting code: " + code) print("Comment posting code: " + code)
return code return code
'''with open('debug/post_comment_response', 'wb') as f:
f.write(content)'''
def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar): def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
headers = { headers = {

View File

@ -53,7 +53,7 @@ def get_search_json(query, page, autocorrect, sort, filters):
'X-YouTube-Client-Version': '2.20180418', 'X-YouTube-Client-Version': '2.20180418',
} }
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D") url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
content = util.fetch_url(url, headers=headers, report_text="Got search results") content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results')
info = json.loads(content) info = json.loads(content)
return info return info

View File

@ -5,6 +5,7 @@ import brotli
import urllib.parse import urllib.parse
import re import re
import time import time
import os
# The trouble with the requests library: It ships its own certificate bundle via certifi # The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates # instead of using the system certificate store, meaning self-signed certificates
@ -103,7 +104,7 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content) content = gzip.decompress(content)
return content return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False): def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
''' '''
When cookiejar_send is set to a CookieJar object, When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it) those cookies will be sent in the request (but cookies in response will not be merged into it)
@ -160,6 +161,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity')) content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
if settings.debugging_save_responses and debug_name is not None:
save_dir = os.path.join(settings.data_dir, 'debug')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
if return_response: if return_response:
return content, response return content, response
return content return content
@ -226,4 +235,4 @@ def update_query_string(query_string, items):
def uppercase_escape(s): def uppercase_escape(s):
return re.sub( return re.sub(
r'\\U([0-9a-fA-F]{8})', r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s) lambda m: chr(int(m.group(1), base=16)), s)