Give a proper error message for 429 errors

These occur when too many requests are coming from a Tor exit node.
Before, there would be an error page with an exception instructing users to report the issue.
But this is an expected and persistent issue.
This commit is contained in:
James Taylor 2020-01-31 20:06:15 -08:00
parent cd4a2fb0eb
commit f787e4e202
6 changed files with 49 additions and 3 deletions

View File

@ -1,6 +1,8 @@
from youtube import util
import flask
import settings
import traceback
from sys import exc_info
yt_app = flask.Flask(__name__)
yt_app.url_map.strict_slashes = False
@ -34,4 +36,14 @@ def commatize(num):
@yt_app.errorhandler(500)
def error_page(e):
if (exc_info()[0] == util.FetchError
and exc_info()[1].code == '429'
and settings.route_tor
):
error_message = ('Error: Youtube blocked the request because the Tor'
' exit node is overcrowded. Try getting a new exit node by'
' restarting the Tor Browser.')
if exc_info()[1].ip:
error_message += ' Exit node IP address: ' + exc_info()[1].ip
return flask.render_template('error.html', error_message=error_message), 502
return flask.render_template('error.html', traceback=traceback.format_exc()), 500

View File

@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'videos':
tasks = (
@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'about':
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')

View File

@ -88,6 +88,7 @@ def get_playlist_page():
gevent.spawn(get_videos, playlist_id, page)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
first_page_json, this_page_json = tasks[0].value, tasks[1].value
info = yt_data_extract.extract_playlist_info(this_page_json)

View File

@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids):
checking_channels.add(channel_id)
check_channels_queue.put(channel_id)
def _get_atoma_feed(channel_id):
url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id
try:
return util.fetch_url(url).decode('utf-8')
except util.FetchError as e:
if e.code == '404': # 404 is expected for terminated channels
return ''
raise
def _get_upstream_videos(channel_id):
try:
@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id):
tasks = (
gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration
gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time
gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time
)
gevent.joinall(tasks)
@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id):
return element
return None
root = defusedxml.ElementTree.fromstring(feed.decode('utf-8'))
root = defusedxml.ElementTree.fromstring(feed)
assert remove_bullshit(root.tag) == 'feed'
for entry in root:
if (remove_bullshit(entry.tag) != 'entry'):

View File

@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request
https_response = http_response
class FetchError(Exception):
def __init__(self, code, reason='', ip=None):
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
self.code = code
self.reason = reason
self.ip = ip
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
content = response.read()
response.release_conn()
if (response.status == 429
and content.startswith(b'<!DOCTYPE')
and b'Our systems have detected unusual traffic' in content):
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content)
ip = ip.group(1).decode('ascii') if ip else None
raise FetchError('429', reason=response.reason, ip=ip)
elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None)
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
add_extra_html_info(item)
return item
def check_gevent_exceptions(*tasks):
for task in tasks:
if task.exception:
raise task.exception

View File

@ -287,6 +287,7 @@ def get_watch_page(video_id=None):
gevent.spawn(extract_info, video_id)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(tasks[1])
comments_info, info = tasks[0].value, tasks[1].value
if info['error']: