Remove ad-hoc response saving from code, create a debug setting for fetch_url

This commit is contained in:
James Taylor
2019-07-23 23:53:04 -07:00
parent cb1c899a45
commit e00c3cf99f
8 changed files with 33 additions and 46 deletions

View File

@@ -5,6 +5,7 @@ import brotli
import urllib.parse
import re
import time
import os
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
@@ -103,7 +104,7 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content)
return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False):
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
'''
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
@@ -160,6 +161,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
if settings.debugging_save_responses and debug_name is not None:
save_dir = os.path.join(settings.data_dir, 'debug')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
if return_response:
return content, response
return content
@@ -226,4 +235,4 @@ def update_query_string(query_string, items):
def uppercase_escape(s):
return re.sub(
r'\\U([0-9a-fA-F]{8})',
lambda m: chr(int(m.group(1), base=16)), s)
lambda m: chr(int(m.group(1), base=16)), s)