Add tor video routing

Includes non-tor video routing by default, so no more chances
of the browser leaking headers or user agent to googlevideo
Adjust settings upgrade system to facilitate change to route_tor
setting.
Add some more space on settings page for dropdown settings so does
not overflow due to options with long names.
Closes #7
This commit is contained in:
James Taylor 2020-09-18 14:37:24 -07:00
parent 1ff97bfde1
commit e9989af03a
5 changed files with 99 additions and 34 deletions

View File

@ -32,24 +32,48 @@ def youtu_be(env, start_response):
env['QUERY_STRING'] += '&v=' + id
yield from yt_app(env, start_response)
def proxy_site(env, start_response):
def proxy_site(env, start_response, video=False):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
'Accept': '*/*',
}
if 'HTTP_RANGE' in env:
headers['Range'] = env['HTTP_RANGE']
url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
if env['QUERY_STRING']:
url += '?' + env['QUERY_STRING']
content, response = util.fetch_url(url, headers, return_response=True)
if video and settings.route_tor == 1:
response, cleanup_func = util.fetch_url_response(url, headers,
use_tor=False)
else:
response, cleanup_func = util.fetch_url_response(url, headers)
headers = response.getheaders()
if isinstance(headers, urllib3._collections.HTTPHeaderDict):
headers = headers.items()
start_response('200 OK', headers )
yield content
start_response(str(response.status) + ' ' + response.reason, headers)
while True:
# a bit over 3 seconds of 360p video
# we want each TCP packet to transmit in large multiples,
# such as 65,536, so we shouldn't read in small chunks
# such as 8192 lest that causes the socket library to limit the
# TCP window size
# Might need fine-tuning, since this gives us 4*65536
# The tradeoff is that larger values (such as 6 seconds) only
# allows video to buffer in those increments, meaning user must wait
# until the entire chunk is downloaded before video starts playing
content_part = response.read(32*8192)
if not content_part:
break
yield content_part
cleanup_func(response)
def proxy_video(env, start_response):
yield from proxy_site(env, start_response, video=True)
site_handlers = {
'youtube.com':yt_app,
@ -57,7 +81,7 @@ site_handlers = {
'ytimg.com': proxy_site,
'yt3.ggpht.com': proxy_site,
'lh3.googleusercontent.com': proxy_site,
'googlevideo.com': proxy_video,
}
def split_url(url):

View File

@ -9,10 +9,17 @@ from flask import request
SETTINGS_INFO = collections.OrderedDict([
('route_tor', {
'type': bool,
'default': False,
'type': int,
'default': 0,
'label': 'Route Tor',
'comment': '',
'comment': '''0 - Off
1 - On, except video
2 - On, including video (see warnings)''',
'options': [
(0, 'Off'),
(1, 'On, except video'),
(2, 'On, including video (see warnings)'),
],
}),
('port_number', {
@ -148,7 +155,7 @@ For security reasons, enabling this is not recommended.''',
('settings_version', {
'type': int,
'default': 2,
'default': 3,
'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''',
'hidden': True,
}),
@ -186,8 +193,21 @@ def upgrade_to_2(settings_dict):
if 'enable_related_videos' in settings_dict:
new_settings['related_videos_mode'] = int(settings_dict['enable_related_videos'])
del new_settings['enable_related_videos']
new_settings['settings_version'] = 2
return new_settings
def upgrade_to_3(settings_dict):
new_settings = settings_dict.copy()
if 'route_tor' in settings_dict:
new_settings['route_tor'] = int(settings_dict['route_tor'])
new_settings['settings_version'] = 3
return new_settings
upgrade_functions = {
1: upgrade_to_2,
2: upgrade_to_3,
}
def log_ignored_line(line_number, message):
print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")")
@ -251,14 +271,20 @@ else:
current_settings_dict[target.id] = node.value.__getattribute__(attributes[type(node.value)])
if 'settings_version' not in current_settings_dict:
print('Upgrading settings.txt')
current_settings_dict = add_missing_settings(upgrade_to_2(current_settings_dict))
# upgrades
latest_version = SETTINGS_INFO['settings_version']['default']
while current_settings_dict.get('settings_version',1) < latest_version:
current_version = current_settings_dict.get('settings_version', 1)
print('Upgrading settings.txt to version', current_version+1)
upgrade_func = upgrade_functions[current_version]
# Must add missing settings here rather than below because
# save_settings needs all settings to be present
current_settings_dict = add_missing_settings(
upgrade_func(current_settings_dict))
save_settings(current_settings_dict)
# some settings not in the file, add those missing settings to the file
elif not current_settings_dict.keys() >= SETTINGS_INFO.keys():
if not current_settings_dict.keys() >= SETTINGS_INFO.keys():
print('Adding missing settings to settings.txt')
current_settings_dict = add_missing_settings(current_settings_dict)
save_settings(current_settings_dict)

View File

@ -4,7 +4,7 @@
{% block style %}
.settings-form {
margin: auto;
width: 500px;
width: 600px;
margin-top:10px;
padding: 10px;
display: block;

View File

@ -119,8 +119,11 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content)
return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
def fetch_url_response(url, headers=(), timeout=15, data=None,
cookiejar_send=None, cookiejar_receive=None,
use_tor=True):
'''
returns response, cleanup_function
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object,
@ -147,8 +150,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii')
start_time = time.time()
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers)
@ -160,19 +161,30 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
response_time = time.time()
content = response.read()
cleanup_func = (lambda r: None)
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
pool = get_pool(use_tor and settings.route_tor)
response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
response_time = time.time()
cleanup_func = (lambda r: r.release_conn())
content = response.read()
response.release_conn()
return response, cleanup_func
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None):
start_time = time.time()
response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.time()
content = response.read()
read_finish = time.time()
cleanup_func(response) # release_connection for urllib3
if (response.status == 429
and content.startswith(b'<!DOCTYPE')
@ -185,7 +197,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None)
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
@ -198,8 +209,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
if return_response:
return content, response
return content
def head(url, use_tor=False, report_text=None, max_redirects=10):

View File

@ -24,7 +24,7 @@ except FileNotFoundError:
def get_video_sources(info):
video_sources = []
if not settings.theater_mode:
if (not settings.theater_mode) or settings.route_tor == 2:
max_resolution = 360
else:
max_resolution = settings.default_resolution
@ -270,10 +270,11 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
else:
info['hls_formats'] = []
# check for 403
# check for 403. Unnecessary for tor video routing b/c ip address is same
info['invidious_used'] = False
info['invidious_reload_button'] = False
if settings.route_tor and info['formats'] and info['formats'][0]['url']:
if (settings.route_tor == 1
and info['formats'] and info['formats'][0]['url']):
try:
response = util.head(info['formats'][0]['url'],
report_text='Checked for URL access')
@ -408,10 +409,10 @@ def get_watch_page(video_id=None):
"author": info['author'],
}
# prefix urls, and other post-processing not handled by yt_data_extract
for item in info['related_videos']:
util.prefix_urls(item)
util.add_extra_html_info(item)
if info['playlist']:
playlist_id = info['playlist']['id']
for item in info['playlist']['items']:
@ -423,6 +424,11 @@ def get_watch_page(video_id=None):
item['url'] += '&index=' + str(item['index'])
info['playlist']['author_url'] = util.prefix_url(
info['playlist']['author_url'])
# Don't prefix hls_formats for now because the urls inside the manifest
# would need to be prefixed as well.
for fmt in info['formats']:
fmt['url'] = util.prefix_url(fmt['url'])
if settings.gather_googlevideo_domains:
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: