fetch_url: 429: get new identity regardless of page content
New 429 captcha page doesn't have IP. This new page appears to match the 429 code plus the json of {"redirect": ...} which would be occasionally received when the pbj json endpoint was used in the past. Closes #22 Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
parent
8cc721b4ec
commit
52ca49628f
@ -292,9 +292,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
|||||||
content,
|
content,
|
||||||
response.getheader('Content-Encoding', default='identity'))
|
response.getheader('Content-Encoding', default='identity'))
|
||||||
|
|
||||||
if (response.status == 429
|
if response.status == 429:
|
||||||
and content.startswith(b'<!DOCTYPE')
|
|
||||||
and b'Our systems have detected unusual traffic' in content):
|
|
||||||
ip = re.search(
|
ip = re.search(
|
||||||
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
||||||
content)
|
content)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user