Remove ad-hoc response saving from code, create a debug setting for fetch_url
This commit is contained in:
parent
cb1c899a45
commit
e00c3cf99f
@ -24,9 +24,12 @@ default_comment_sorting = 0
|
|||||||
|
|
||||||
# developer use to debug 403s
|
# developer use to debug 403s
|
||||||
gather_googlevideo_domains = False
|
gather_googlevideo_domains = False
|
||||||
|
|
||||||
|
# save all responses from youtube for debugging
|
||||||
|
debugging_save_responses = False
|
||||||
'''
|
'''
|
||||||
exec(default_settings)
|
exec(default_settings)
|
||||||
allowed_targets = set(("route_tor", "port_number", "allow_foreign_addresses", "subtitles_mode", "subtitles_language", "enable_related_videos", "enable_comments", "enable_comment_avatars", "default_comment_sorting", "gather_googlevideo_domains"))
|
allowed_targets = set(("route_tor", "port_number", "allow_foreign_addresses", "subtitles_mode", "subtitles_language", "enable_related_videos", "enable_comments", "enable_comment_avatars", "default_comment_sorting", "gather_googlevideo_domains", "debugging_save_responses"))
|
||||||
|
|
||||||
def log_ignored_line(line_number, message):
|
def log_ignored_line(line_number, message):
|
||||||
print("settings.txt: Ignoring line " + str(node.lineno) + " (" + message + ")")
|
print("settings.txt: Ignoring line " + str(node.lineno) + " (" + message + ")")
|
||||||
|
@ -162,10 +162,8 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
Taken from youtube-dl
|
Taken from youtube-dl
|
||||||
"""
|
"""
|
||||||
|
|
||||||
login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='login_page').decode('utf-8')
|
||||||
'''with open('debug/login_page', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(login_page)'''
|
|
||||||
#print(cookiejar.as_lwp_str())
|
|
||||||
if login_page is False:
|
if login_page is False:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -189,10 +187,7 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
'Google-Accounts-XSRF': 1,
|
'Google-Accounts-XSRF': 1,
|
||||||
}
|
}
|
||||||
headers.update(yt_dl_headers)
|
headers.update(yt_dl_headers)
|
||||||
result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name=note).decode('utf-8')
|
||||||
#print(cookiejar.as_lwp_str())
|
|
||||||
'''with open('debug/' + note, 'w', encoding='utf-8') as f:
|
|
||||||
f.write(result)'''
|
|
||||||
result = re.sub(r'^[^\[]*', '', result)
|
result = re.sub(r'^[^\[]*', '', result)
|
||||||
return json.loads(result)
|
return json.loads(result)
|
||||||
|
|
||||||
@ -321,12 +316,10 @@ def _login(username, password, cookiejar, use_tor):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
|
check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name='check_cookie_results').decode('utf-8')
|
||||||
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
'''with open('debug/check_cookie_results', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(check_cookie_results)'''
|
|
||||||
|
|
||||||
if 'https://myaccount.google.com/' not in check_cookie_results:
|
if 'https://myaccount.google.com/' not in check_cookie_results:
|
||||||
warn('Unable to log in')
|
warn('Unable to log in')
|
||||||
|
@ -88,11 +88,9 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
|
|||||||
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
|
url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
|
||||||
|
|
||||||
print("Sending channel tab ajax request")
|
print("Sending channel tab ajax request")
|
||||||
content = util.fetch_url(url, util.desktop_ua + headers_1)
|
content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab')
|
||||||
print("Finished recieving channel tab response")
|
print("Finished recieving channel tab response")
|
||||||
|
|
||||||
'''with open('debug/channel_debug', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def get_number_of_videos(channel_id):
|
def get_number_of_videos(channel_id):
|
||||||
@ -103,15 +101,13 @@ def get_number_of_videos(channel_id):
|
|||||||
|
|
||||||
# Sometimes retrieving playlist info fails with 403 for no discernable reason
|
# Sometimes retrieving playlist info fails with 403 for no discernable reason
|
||||||
try:
|
try:
|
||||||
response = util.fetch_url(url, util.mobile_ua + headers_pbj)
|
response = util.fetch_url(url, util.mobile_ua + headers_pbj, debug_name='number_of_videos')
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
if e.code != 403:
|
if e.code != 403:
|
||||||
raise
|
raise
|
||||||
print("Couldn't retrieve number of videos")
|
print("Couldn't retrieve number of videos")
|
||||||
return 1000
|
return 1000
|
||||||
|
|
||||||
'''with open('debug/playlist_debug_metadata', 'wb') as f:
|
|
||||||
f.write(response)'''
|
|
||||||
response = response.decode('utf-8')
|
response = response.decode('utf-8')
|
||||||
print("Got response for number of videos")
|
print("Got response for number of videos")
|
||||||
|
|
||||||
@ -135,9 +131,7 @@ def get_channel_search_json(channel_id, query, page):
|
|||||||
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
|
ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
|
||||||
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
|
ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
|
||||||
|
|
||||||
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search')
|
||||||
'''with open('debug/channel_search_debug', 'wb') as f:
|
|
||||||
f.write(polymer_json)'''
|
|
||||||
|
|
||||||
return polymer_json
|
return polymer_json
|
||||||
|
|
||||||
@ -293,9 +287,9 @@ def get_channel_page(channel_id, tab='videos'):
|
|||||||
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
|
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
|
||||||
|
|
||||||
elif tab == 'about':
|
elif tab == 'about':
|
||||||
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about')
|
||||||
elif tab == 'playlists':
|
elif tab == 'playlists':
|
||||||
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists')
|
||||||
elif tab == 'search':
|
elif tab == 'search':
|
||||||
tasks = (
|
tasks = (
|
||||||
gevent.spawn(get_number_of_videos, channel_id ),
|
gevent.spawn(get_number_of_videos, channel_id ),
|
||||||
@ -336,13 +330,11 @@ def get_channel_page_general_url(base_url, tab, request):
|
|||||||
query = request.args.get('query', '')
|
query = request.args.get('query', '')
|
||||||
|
|
||||||
if tab == 'videos':
|
if tab == 'videos':
|
||||||
polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
|
||||||
with open('debug/channel_debug', 'wb') as f:
|
|
||||||
f.write(polymer_json)
|
|
||||||
elif tab == 'about':
|
elif tab == 'about':
|
||||||
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
|
||||||
elif tab == 'playlists':
|
elif tab == 'playlists':
|
||||||
polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1)
|
polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists')
|
||||||
elif tab == 'search':
|
elif tab == 'search':
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
else:
|
else:
|
||||||
|
@ -83,7 +83,7 @@ def request_comments(ctoken, replies=False):
|
|||||||
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
|
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
|
||||||
|
|
||||||
for i in range(0,8): # don't retry more than 8 times
|
for i in range(0,8): # don't retry more than 8 times
|
||||||
content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
|
content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments", debug_name='request_comments')
|
||||||
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
|
if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
|
||||||
content = content[4:]
|
content = content[4:]
|
||||||
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
|
elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
|
||||||
@ -91,8 +91,6 @@ def request_comments(ctoken, replies=False):
|
|||||||
print("got <!DOCTYPE>, retrying")
|
print("got <!DOCTYPE>, retrying")
|
||||||
continue
|
continue
|
||||||
break
|
break
|
||||||
'''with open('debug/comments_debug', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,9 +47,7 @@ headers_1 = (
|
|||||||
|
|
||||||
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
|
def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
|
||||||
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
|
url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
|
||||||
content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text)
|
content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page')
|
||||||
'''with open('debug/playlist_debug', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
content = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
|
|
||||||
return content
|
return content
|
||||||
@ -67,9 +65,7 @@ def get_videos(playlist_id, page):
|
|||||||
'X-YouTube-Client-Version': '2.20180508',
|
'X-YouTube-Client-Version': '2.20180508',
|
||||||
}
|
}
|
||||||
|
|
||||||
content = util.fetch_url(url, headers, report_text="Retrieved playlist")
|
content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')
|
||||||
'''with open('debug/playlist_debug', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
|
|
||||||
info = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
info = json.loads(util.uppercase_escape(content.decode('utf-8')))
|
||||||
return info
|
return info
|
||||||
|
@ -35,13 +35,11 @@ def _post_comment(text, video_id, session_token, cookiejar):
|
|||||||
data = urllib.parse.urlencode(data_dict).encode()
|
data = urllib.parse.urlencode(data_dict).encode()
|
||||||
|
|
||||||
|
|
||||||
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_comment')
|
||||||
|
|
||||||
code = json.loads(content)['code']
|
code = json.loads(content)['code']
|
||||||
print("Comment posting code: " + code)
|
print("Comment posting code: " + code)
|
||||||
return code
|
return code
|
||||||
'''with open('debug/post_comment_response', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
|
|
||||||
|
|
||||||
def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar):
|
def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar):
|
||||||
@ -66,13 +64,11 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie
|
|||||||
}
|
}
|
||||||
data = urllib.parse.urlencode(data_dict).encode()
|
data = urllib.parse.urlencode(data_dict).encode()
|
||||||
|
|
||||||
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
|
content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar, debug_name='post_reply')
|
||||||
|
|
||||||
code = json.loads(content)['code']
|
code = json.loads(content)['code']
|
||||||
print("Comment posting code: " + code)
|
print("Comment posting code: " + code)
|
||||||
return code
|
return code
|
||||||
'''with open('debug/post_comment_response', 'wb') as f:
|
|
||||||
f.write(content)'''
|
|
||||||
|
|
||||||
def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
|
def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
|
||||||
headers = {
|
headers = {
|
||||||
|
@ -53,7 +53,7 @@ def get_search_json(query, page, autocorrect, sort, filters):
|
|||||||
'X-YouTube-Client-Version': '2.20180418',
|
'X-YouTube-Client-Version': '2.20180418',
|
||||||
}
|
}
|
||||||
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
|
url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
|
||||||
content = util.fetch_url(url, headers=headers, report_text="Got search results")
|
content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results')
|
||||||
info = json.loads(content)
|
info = json.loads(content)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ import brotli
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import os
|
||||||
|
|
||||||
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
||||||
# instead of using the system certificate store, meaning self-signed certificates
|
# instead of using the system certificate store, meaning self-signed certificates
|
||||||
@ -103,7 +104,7 @@ def decode_content(content, encoding_header):
|
|||||||
content = gzip.decompress(content)
|
content = gzip.decompress(content)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False):
|
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
|
||||||
'''
|
'''
|
||||||
When cookiejar_send is set to a CookieJar object,
|
When cookiejar_send is set to a CookieJar object,
|
||||||
those cookies will be sent in the request (but cookies in response will not be merged into it)
|
those cookies will be sent in the request (but cookies in response will not be merged into it)
|
||||||
@ -160,6 +161,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
|
|||||||
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
|
||||||
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
|
||||||
|
|
||||||
|
if settings.debugging_save_responses and debug_name is not None:
|
||||||
|
save_dir = os.path.join(settings.data_dir, 'debug')
|
||||||
|
if not os.path.exists(save_dir):
|
||||||
|
os.makedirs(save_dir)
|
||||||
|
|
||||||
|
with open(os.path.join(save_dir, debug_name), 'wb') as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
if return_response:
|
if return_response:
|
||||||
return content, response
|
return content, response
|
||||||
return content
|
return content
|
||||||
@ -226,4 +235,4 @@ def update_query_string(query_string, items):
|
|||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
lambda m: chr(int(m.group(1), base=16)), s)
|
lambda m: chr(int(m.group(1), base=16)), s)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user