Add tor video routing

Includes non-tor video routing by default, so no more chances
of the browser leaking headers or user agent to googlevideo
Adjust settings upgrade system to facilitate change to route_tor
setting.
Add some more space on settings page for dropdown settings so does
not overflow due to options with long names.
Closes #7
This commit is contained in:
James Taylor 2020-09-18 14:37:24 -07:00
parent 1ff97bfde1
commit e9989af03a
5 changed files with 99 additions and 34 deletions

View File

@ -32,24 +32,48 @@ def youtu_be(env, start_response):
env['QUERY_STRING'] += '&v=' + id env['QUERY_STRING'] += '&v=' + id
yield from yt_app(env, start_response) yield from yt_app(env, start_response)
def proxy_site(env, start_response): def proxy_site(env, start_response, video=False):
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
'Accept': '*/*', 'Accept': '*/*',
} }
if 'HTTP_RANGE' in env:
headers['Range'] = env['HTTP_RANGE']
url = "https://" + env['SERVER_NAME'] + env['PATH_INFO'] url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
if env['QUERY_STRING']: if env['QUERY_STRING']:
url += '?' + env['QUERY_STRING'] url += '?' + env['QUERY_STRING']
if video and settings.route_tor == 1:
content, response = util.fetch_url(url, headers, return_response=True) response, cleanup_func = util.fetch_url_response(url, headers,
use_tor=False)
else:
response, cleanup_func = util.fetch_url_response(url, headers)
headers = response.getheaders() headers = response.getheaders()
if isinstance(headers, urllib3._collections.HTTPHeaderDict): if isinstance(headers, urllib3._collections.HTTPHeaderDict):
headers = headers.items() headers = headers.items()
start_response('200 OK', headers ) start_response(str(response.status) + ' ' + response.reason, headers)
yield content while True:
# a bit over 3 seconds of 360p video
# we want each TCP packet to transmit in large multiples,
# such as 65,536, so we shouldn't read in small chunks
# such as 8192 lest that causes the socket library to limit the
# TCP window size
# Might need fine-tuning, since this gives us 4*65536
# The tradeoff is that larger values (such as 6 seconds) only
# allows video to buffer in those increments, meaning user must wait
# until the entire chunk is downloaded before video starts playing
content_part = response.read(32*8192)
if not content_part:
break
yield content_part
cleanup_func(response)
def proxy_video(env, start_response):
yield from proxy_site(env, start_response, video=True)
site_handlers = { site_handlers = {
'youtube.com':yt_app, 'youtube.com':yt_app,
@ -57,7 +81,7 @@ site_handlers = {
'ytimg.com': proxy_site, 'ytimg.com': proxy_site,
'yt3.ggpht.com': proxy_site, 'yt3.ggpht.com': proxy_site,
'lh3.googleusercontent.com': proxy_site, 'lh3.googleusercontent.com': proxy_site,
'googlevideo.com': proxy_video,
} }
def split_url(url): def split_url(url):

View File

@ -9,10 +9,17 @@ from flask import request
SETTINGS_INFO = collections.OrderedDict([ SETTINGS_INFO = collections.OrderedDict([
('route_tor', { ('route_tor', {
'type': bool, 'type': int,
'default': False, 'default': 0,
'label': 'Route Tor', 'label': 'Route Tor',
'comment': '', 'comment': '''0 - Off
1 - On, except video
2 - On, including video (see warnings)''',
'options': [
(0, 'Off'),
(1, 'On, except video'),
(2, 'On, including video (see warnings)'),
],
}), }),
('port_number', { ('port_number', {
@ -148,7 +155,7 @@ For security reasons, enabling this is not recommended.''',
('settings_version', { ('settings_version', {
'type': int, 'type': int,
'default': 2, 'default': 3,
'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''', 'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''',
'hidden': True, 'hidden': True,
}), }),
@ -186,8 +193,21 @@ def upgrade_to_2(settings_dict):
if 'enable_related_videos' in settings_dict: if 'enable_related_videos' in settings_dict:
new_settings['related_videos_mode'] = int(settings_dict['enable_related_videos']) new_settings['related_videos_mode'] = int(settings_dict['enable_related_videos'])
del new_settings['enable_related_videos'] del new_settings['enable_related_videos']
new_settings['settings_version'] = 2
return new_settings return new_settings
def upgrade_to_3(settings_dict):
new_settings = settings_dict.copy()
if 'route_tor' in settings_dict:
new_settings['route_tor'] = int(settings_dict['route_tor'])
new_settings['settings_version'] = 3
return new_settings
upgrade_functions = {
1: upgrade_to_2,
2: upgrade_to_3,
}
def log_ignored_line(line_number, message): def log_ignored_line(line_number, message):
print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")") print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")")
@ -251,14 +271,20 @@ else:
current_settings_dict[target.id] = node.value.__getattribute__(attributes[type(node.value)]) current_settings_dict[target.id] = node.value.__getattribute__(attributes[type(node.value)])
# upgrades
if 'settings_version' not in current_settings_dict: latest_version = SETTINGS_INFO['settings_version']['default']
print('Upgrading settings.txt') while current_settings_dict.get('settings_version',1) < latest_version:
current_settings_dict = add_missing_settings(upgrade_to_2(current_settings_dict)) current_version = current_settings_dict.get('settings_version', 1)
print('Upgrading settings.txt to version', current_version+1)
upgrade_func = upgrade_functions[current_version]
# Must add missing settings here rather than below because
# save_settings needs all settings to be present
current_settings_dict = add_missing_settings(
upgrade_func(current_settings_dict))
save_settings(current_settings_dict) save_settings(current_settings_dict)
# some settings not in the file, add those missing settings to the file # some settings not in the file, add those missing settings to the file
elif not current_settings_dict.keys() >= SETTINGS_INFO.keys(): if not current_settings_dict.keys() >= SETTINGS_INFO.keys():
print('Adding missing settings to settings.txt') print('Adding missing settings to settings.txt')
current_settings_dict = add_missing_settings(current_settings_dict) current_settings_dict = add_missing_settings(current_settings_dict)
save_settings(current_settings_dict) save_settings(current_settings_dict)

View File

@ -4,7 +4,7 @@
{% block style %} {% block style %}
.settings-form { .settings-form {
margin: auto; margin: auto;
width: 500px; width: 600px;
margin-top:10px; margin-top:10px;
padding: 10px; padding: 10px;
display: block; display: block;

View File

@ -119,8 +119,11 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content) content = gzip.decompress(content)
return content return content
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): def fetch_url_response(url, headers=(), timeout=15, data=None,
cookiejar_send=None, cookiejar_receive=None,
use_tor=True):
''' '''
returns response, cleanup_function
When cookiejar_send is set to a CookieJar object, When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it) those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object, When cookiejar_receive is set to a CookieJar object,
@ -147,8 +150,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif not isinstance(data, bytes): elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii') data = urllib.parse.urlencode(data).encode('ascii')
start_time = time.time()
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers) req = urllib.request.Request(url, data=data, headers=headers)
@ -160,19 +161,30 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
opener = urllib.request.build_opener(cookie_processor) opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout) response = opener.open(req, timeout=timeout)
response_time = time.time() cleanup_func = (lambda r: None)
content = response.read()
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
pool = get_pool(use_tor and settings.route_tor) pool = get_pool(use_tor and settings.route_tor)
response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False) response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
response_time = time.time() cleanup_func = (lambda r: r.release_conn())
content = response.read() return response, cleanup_func
response.release_conn()
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None):
start_time = time.time()
response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.time()
content = response.read()
read_finish = time.time()
cleanup_func(response) # release_connection for urllib3
if (response.status == 429 if (response.status == 429
and content.startswith(b'<!DOCTYPE') and content.startswith(b'<!DOCTYPE')
@ -185,7 +197,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif response.status >= 400: elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None) raise FetchError(str(response.status), reason=response.reason, ip=None)
read_finish = time.time()
if report_text: if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity')) content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
@ -198,8 +209,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
with open(os.path.join(save_dir, debug_name), 'wb') as f: with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content) f.write(content)
if return_response:
return content, response
return content return content
def head(url, use_tor=False, report_text=None, max_redirects=10): def head(url, use_tor=False, report_text=None, max_redirects=10):

View File

@ -24,7 +24,7 @@ except FileNotFoundError:
def get_video_sources(info): def get_video_sources(info):
video_sources = [] video_sources = []
if not settings.theater_mode: if (not settings.theater_mode) or settings.route_tor == 2:
max_resolution = 360 max_resolution = 360
else: else:
max_resolution = settings.default_resolution max_resolution = settings.default_resolution
@ -270,10 +270,11 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
else: else:
info['hls_formats'] = [] info['hls_formats'] = []
# check for 403 # check for 403. Unnecessary for tor video routing b/c ip address is same
info['invidious_used'] = False info['invidious_used'] = False
info['invidious_reload_button'] = False info['invidious_reload_button'] = False
if settings.route_tor and info['formats'] and info['formats'][0]['url']: if (settings.route_tor == 1
and info['formats'] and info['formats'][0]['url']):
try: try:
response = util.head(info['formats'][0]['url'], response = util.head(info['formats'][0]['url'],
report_text='Checked for URL access') report_text='Checked for URL access')
@ -408,10 +409,10 @@ def get_watch_page(video_id=None):
"author": info['author'], "author": info['author'],
} }
# prefix urls, and other post-processing not handled by yt_data_extract
for item in info['related_videos']: for item in info['related_videos']:
util.prefix_urls(item) util.prefix_urls(item)
util.add_extra_html_info(item) util.add_extra_html_info(item)
if info['playlist']: if info['playlist']:
playlist_id = info['playlist']['id'] playlist_id = info['playlist']['id']
for item in info['playlist']['items']: for item in info['playlist']['items']:
@ -423,6 +424,11 @@ def get_watch_page(video_id=None):
item['url'] += '&index=' + str(item['index']) item['url'] += '&index=' + str(item['index'])
info['playlist']['author_url'] = util.prefix_url( info['playlist']['author_url'] = util.prefix_url(
info['playlist']['author_url']) info['playlist']['author_url'])
# Don't prefix hls_formats for now because the urls inside the manifest
# would need to be prefixed as well.
for fmt in info['formats']:
fmt['url'] = util.prefix_url(fmt['url'])
if settings.gather_googlevideo_domains: if settings.gather_googlevideo_domains:
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: