Exit node retrying: Retry 3 times. Also add tests for it.

Closes #20

Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
James Taylor 2020-12-21 11:59:35 -08:00 committed by Jesús
parent 574cb2dae8
commit b11120d000
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
6 changed files with 140 additions and 4 deletions

4
pytest.ini Normal file
View File

@ -0,0 +1,4 @@
# pytest.ini
[pytest]
testpaths =
tests

1
requirements-dev.txt Normal file
View File

@ -0,0 +1 @@
pytest>=6.2.1

14
tests/conftest.py Normal file
View File

@ -0,0 +1,14 @@
import pytest
import urllib3
import urllib
import urllib.request
import socket
# https://realpython.com/pytest-python-testing/
@pytest.fixture(autouse=True)
def disable_network_calls(monkeypatch):
def stunted_get(*args, **kwargs):
raise RuntimeError('Network access not allowed during testing!')
monkeypatch.setattr(urllib.request, 'Request', stunted_get)
monkeypatch.setattr(urllib3.PoolManager, 'request', stunted_get)
monkeypatch.setattr(socket, 'socket', stunted_get)

View File

@ -0,0 +1,28 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head><meta http-equiv="content-type" content="text/html; charset=utf-8"><meta name="viewport" content="initial-scale=1"><title>https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999</title></head>
<body style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();}">
<div style="max-width:400px;">
<hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br>
<form id="captcha-form" action="index" method="post">
<script src="https://www.google.com/recaptcha/api.js" async defer></script>
<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="vJ20x5QPFGCo8r3XkMznOwMTCK8wPW_bLLhPDgo_I1cwF6xLuYZlq2G2wZPaSJiE8zx5YnaxJzFQGsyhY6NHQKMAaUTtSP6GAbPtueM35Jq3Hmk-gEAozXvvF0HIjK5oONT7F-06MwXDxA4HOqZyOEbsUG_8JjFcCklQjUNUVVItgyLpIbZ1dQ-IEtCXY5E3KDcgHGznfAyMGk_bby9uCpfxNTQwljGippKv1PIU7dI4d5LLpgBPWF0"></div>
<input type='hidden' name='q' value='EhAgAUug_-oCrgAAAAAAAAoQGPe-9u8FIhkA8aeDS_-EXvhS86PaeaDvps8cqCssFqOzMgFy'><input type="hidden" name="continue" value="https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999">
</form>
<hr noshade size="1" style="color:#ccc; background-color:#ccc;">
<div style="font-size:13px;">
<b>About this page</b><br><br>
Our systems have detected unusual traffic from your computer network. This page checks to see if it&#39;s really you sending the requests, and not a robot. <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br>
<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help &mdash; a different computer using the same IP address may be responsible. <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
</div>
IP address: 2001:4ba0:ffea:2ae::a10<br>Time: 2019-12-21T04:28:41Z<br>URL: https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999<br>
</div>
</div>
</body>
</html>

76
tests/test_util.py Normal file
View File

@ -0,0 +1,76 @@
from youtube import util
import settings
import pytest # overview: https://realpython.com/pytest-python-testing/
import urllib3
import io
import os
import stem
def load_test_page(name):
with open(os.path.join('./tests/test_responses', name), 'rb') as f:
return f.read()
html429 = load_test_page('429.html')
class MockResponse(urllib3.response.HTTPResponse):
def __init__(self, body='success', headers=None, status=200, reason=''):
print(body[0:10])
headers = headers or {}
if isinstance(body, str):
body = body.encode('utf-8')
self.body_io = io.BytesIO(body)
self.read = self.body_io.read
urllib3.response.HTTPResponse.__init__(
self, body=body, headers=headers, status=status,
preload_content=False, decode_content=False, reason=reason
)
class NewIdentityState():
MAX_TRIES = util.TorManager.MAX_TRIES
def __init__(self, new_identities_till_success):
self.new_identities_till_success = new_identities_till_success
def new_identity(self, *args, **kwargs):
print('newidentity')
self.new_identities_till_success -= 1
def fetch_url_response(self, *args, **kwargs):
cleanup_func = (lambda r: None)
if self.new_identities_till_success == 0:
return MockResponse(), cleanup_func
return MockResponse(body=html429, status=429), cleanup_func
class MockController():
def authenticate(self, *args, **kwargs):
pass
@classmethod
def from_port(cls, *args, **kwargs):
return cls()
def __enter__(self, *args, **kwargs):
return self
def __exit__(self, *args, **kwargs):
pass
@pytest.mark.parametrize('new_identities_till_success',
[i for i in range(0, NewIdentityState.MAX_TRIES+2)])
def test_exit_node_retry(monkeypatch, new_identities_till_success):
new_identity_state = NewIdentityState(new_identities_till_success)
# https://docs.pytest.org/en/stable/monkeypatch.html
monkeypatch.setattr(settings, 'route_tor', 1)
monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one
MockController.signal = new_identity_state.new_identity
monkeypatch.setattr(stem.control, 'Controller', MockController)
monkeypatch.setattr(util, 'fetch_url_response',
new_identity_state.fetch_url_response)
if new_identities_till_success <= NewIdentityState.MAX_TRIES:
assert util.fetch_url('url') == b'success'
else:
with pytest.raises(util.FetchError) as excinfo:
util.fetch_url('url')
assert int(excinfo.value.code) == 429

View File

@ -60,6 +60,9 @@ connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
class TorManager: class TorManager:
MAX_TRIES = 3
COOLDOWN_TIME = 5
def __init__(self): def __init__(self):
self.old_tor_connection_pool = None self.old_tor_connection_pool = None
self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager( self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager(
@ -69,6 +72,7 @@ class TorManager:
self.new_identity_lock = gevent.lock.BoundedSemaphore(1) self.new_identity_lock = gevent.lock.BoundedSemaphore(1)
self.last_new_identity_time = time.monotonic() - 20 self.last_new_identity_time = time.monotonic() - 20
self.try_num = 1
def refresh_tor_connection_pool(self): def refresh_tor_connection_pool(self):
self.tor_connection_pool.clear() self.tor_connection_pool.clear()
@ -108,9 +112,14 @@ class TorManager:
return None return None
delta = time.monotonic() - self.last_new_identity_time delta = time.monotonic() - self.last_new_identity_time
if delta < 20: if delta < self.COOLDOWN_TIME and self.try_num == 1:
print('new_identity: Retried already within last 20 seconds') err = ('Retried with new circuit %d times (max) within last '
return 'Retried with new circuit once (max) within last 20 seconds.' '%d seconds.' % (self.MAX_TRIES, self.COOLDOWN_TIME))
print('new_identity:', err)
return err
elif delta >= self.COOLDOWN_TIME:
self.try_num = 1
try: try:
port = settings.tor_control_port port = settings.tor_control_port
with stem.control.Controller.from_port(port=port) as controller: with stem.control.Controller.from_port(port=port) as controller:
@ -120,10 +129,14 @@ class TorManager:
print('new_identity: NEWNYM signal sent') print('new_identity: NEWNYM signal sent')
self.last_new_identity_time = time.monotonic() self.last_new_identity_time = time.monotonic()
self.refresh_tor_connection_pool() self.refresh_tor_connection_pool()
return None
except stem.SocketError: except stem.SocketError:
traceback.print_exc() traceback.print_exc()
return 'Failed to connect to Tor control port.' return 'Failed to connect to Tor control port.'
finally:
self.try_num += 1
if self.try_num > self.MAX_TRIES:
self.try_num = 1
return None
finally: finally:
self.new_identity_lock.release() self.new_identity_lock.release()