Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integration
Some checks failed
CI / test (push) Failing after 1m19s
Some checks failed
CI / test (push) Failing after 1m19s
Major Features: - HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities - HD channel avatars (240x240 instead of 88x88) - YouTube 2024+ lockupViewModel support for channel playlists - youtubei/v1/browse API integration for channel playlist tabs - yt-dlp integration for multi-language audio and subtitles Bug Fixes: - Fixed undefined `abort` import in playlist.py - Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode) - Fixed missing `traceback` import in proto_debug.py - Fixed blurry playlist thumbnails using default.jpg instead of HD versions - Fixed channel playlists page using deprecated pbj=1 format Improvements: - Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default) - JavaScript thumbnail_fallback() handler for 404 errors - Better thumbnail quality across all pages (watch, channel, playlist, subscriptions) - Consistent HD avatar display for all channel items - Settings system automatically adds new settings without breaking user config Files Modified: - youtube/watch.py - HD thumbnails for related videos and playlist items - youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration - youtube/playlist.py - HD thumbnails, fixed abort import - youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements - youtube/comments.py - HD video thumbnail - youtube/subscriptions.py - HD thumbnails, fixed abort import - youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info() - youtube/yt_data_extract/everything_else.py - HD playlist thumbnails - youtube/proto.py - Fixed undefined function references - youtube/proto_debug.py - Added traceback import - youtube/static/js/common.js - thumbnail_fallback() handler - youtube/templates/*.html - Added onerror handlers for thumbnail fallback - youtube/version.py - Bump to v0.4.0 Technical Details: - All thumbnail URLs now use hq720.jpg (1280x720) when available - Fallback handled client-side via JavaScript onerror handler - Server-side avatar upgrade via regex in util.prefix_url() - lockupViewModel parser extracts contentType, metadata, and first_video_id - Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1 - Settings version system ensures backward compatibility
This commit is contained in:
227
youtube/util.py
227
youtube/util.py
@@ -1,4 +1,5 @@
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import settings
|
||||
import socks
|
||||
import sockshandler
|
||||
@@ -18,6 +19,8 @@ import gevent.queue
|
||||
import gevent.lock
|
||||
import collections
|
||||
import stem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import stem.control
|
||||
import traceback
|
||||
|
||||
@@ -302,73 +305,144 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
|
||||
debug_name=None):
|
||||
while True:
|
||||
start_time = time.monotonic()
|
||||
"""
|
||||
Fetch URL with exponential backoff retry logic for rate limiting.
|
||||
|
||||
response, cleanup_func = fetch_url_response(
|
||||
url, headers, timeout=timeout, data=data,
|
||||
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
|
||||
use_tor=use_tor)
|
||||
response_time = time.monotonic()
|
||||
Retries:
|
||||
- 429 Too Many Requests: Exponential backoff (1s, 2s, 4s, 8s, 16s)
|
||||
- 503 Service Unavailable: Exponential backoff
|
||||
- 302 Redirect to Google Sorry: Treated as rate limit
|
||||
|
||||
content = response.read()
|
||||
Max retries: 5 attempts with exponential backoff
|
||||
"""
|
||||
import random
|
||||
|
||||
read_finish = time.monotonic()
|
||||
max_retries = 5
|
||||
base_delay = 1.0 # Base delay in seconds
|
||||
|
||||
cleanup_func(response) # release_connection for urllib3
|
||||
content = decode_content(
|
||||
content,
|
||||
response.headers.get('Content-Encoding', default='identity'))
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
start_time = time.monotonic()
|
||||
|
||||
if (settings.debugging_save_responses
|
||||
and debug_name is not None
|
||||
and content):
|
||||
save_dir = os.path.join(settings.data_dir, 'debug')
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
response, cleanup_func = fetch_url_response(
|
||||
url, headers, timeout=timeout, data=data,
|
||||
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
|
||||
use_tor=use_tor)
|
||||
response_time = time.monotonic()
|
||||
|
||||
with open(os.path.join(save_dir, debug_name), 'wb') as f:
|
||||
f.write(content)
|
||||
content = response.read()
|
||||
|
||||
if response.status == 429 or (
|
||||
response.status == 302 and (response.getheader('Location') == url
|
||||
or response.getheader('Location').startswith(
|
||||
'https://www.google.com/sorry/index'
|
||||
)
|
||||
)
|
||||
):
|
||||
print(response.status, response.reason, response.headers)
|
||||
ip = re.search(
|
||||
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
||||
content)
|
||||
ip = ip.group(1).decode('ascii') if ip else None
|
||||
if not ip:
|
||||
ip = re.search(r'IP=((?:\d+\.)+\d+)',
|
||||
response.getheader('Set-Cookie') or '')
|
||||
ip = ip.group(1) if ip else None
|
||||
read_finish = time.monotonic()
|
||||
|
||||
# don't get new identity if we're not using Tor
|
||||
if not use_tor:
|
||||
raise FetchError('429', reason=response.reason, ip=ip)
|
||||
cleanup_func(response) # release_connection for urllib3
|
||||
content = decode_content(
|
||||
content,
|
||||
response.headers.get('Content-Encoding', default='identity'))
|
||||
|
||||
print('Error: YouTube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip)
|
||||
if (settings.debugging_save_responses
|
||||
and debug_name is not None
|
||||
and content):
|
||||
save_dir = os.path.join(settings.data_dir, 'debug')
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
# get new identity
|
||||
error = tor_manager.new_identity(start_time)
|
||||
if error:
|
||||
raise FetchError(
|
||||
'429', reason=response.reason, ip=ip,
|
||||
error_message='Automatic circuit change: ' + error)
|
||||
else:
|
||||
continue # retry now that we have new identity
|
||||
with open(os.path.join(save_dir, debug_name), 'wb') as f:
|
||||
f.write(content)
|
||||
|
||||
elif response.status >= 400:
|
||||
raise FetchError(str(response.status), reason=response.reason,
|
||||
ip=None)
|
||||
break
|
||||
# Check for rate limiting (429) or redirect to Google Sorry
|
||||
if response.status == 429 or (
|
||||
response.status == 302 and (response.getheader('Location') == url
|
||||
or response.getheader('Location').startswith(
|
||||
'https://www.google.com/sorry/index'
|
||||
)
|
||||
)
|
||||
):
|
||||
logger.info(f'Rate limit response: {response.status} {response.reason}')
|
||||
ip = re.search(
|
||||
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
|
||||
content)
|
||||
ip = ip.group(1).decode('ascii') if ip else None
|
||||
if not ip:
|
||||
ip = re.search(r'IP=((?:\d+\.)+\d+)',
|
||||
response.getheader('Set-Cookie') or '')
|
||||
ip = ip.group(1) if ip else None
|
||||
|
||||
# If this is the last attempt, raise error
|
||||
if attempt >= max_retries - 1:
|
||||
if not use_tor or not settings.route_tor:
|
||||
logger.warning(f'YouTube returned 429 but Tor is not enabled. Consider enabling Tor routing.')
|
||||
raise FetchError('429', reason=response.reason, ip=ip)
|
||||
|
||||
logger.error(f'YouTube blocked request - Tor exit node overutilized. Exit IP: {ip}')
|
||||
|
||||
# get new identity
|
||||
error = tor_manager.new_identity(start_time)
|
||||
if error:
|
||||
raise FetchError(
|
||||
'429', reason=response.reason, ip=ip,
|
||||
error_message='Automatic circuit change: ' + error)
|
||||
else:
|
||||
continue # retry with new identity
|
||||
|
||||
# Calculate delay with exponential backoff and jitter
|
||||
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.info(f'Rate limited (429). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
|
||||
time.sleep(delay)
|
||||
continue # retry
|
||||
|
||||
# Check for client errors (400, 404) - don't retry these
|
||||
if response.status == 400:
|
||||
logger.error(f'Bad Request (400) - Invalid parameters or URL: {url[:100]}')
|
||||
raise FetchError('400', reason='Bad Request - Invalid parameters or URL format', ip=None)
|
||||
|
||||
if response.status == 404:
|
||||
logger.warning(f'Not Found (404): {url[:100]}')
|
||||
raise FetchError('404', reason='Not Found', ip=None)
|
||||
|
||||
# Check for other server errors (503, 502, 504)
|
||||
if response.status in (502, 503, 504):
|
||||
if attempt >= max_retries - 1:
|
||||
logger.error(f'Server error {response.status} after {max_retries} retries')
|
||||
raise FetchError(str(response.status), reason=response.reason, ip=None)
|
||||
|
||||
# Exponential backoff for server errors
|
||||
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
# Success - break out of retry loop
|
||||
break
|
||||
|
||||
except urllib3.exceptions.MaxRetryError as e:
|
||||
# If this is the last attempt, raise the error
|
||||
if attempt >= max_retries - 1:
|
||||
exception_cause = e.__context__.__context__
|
||||
if (isinstance(exception_cause, socks.ProxyConnectionError)
|
||||
and settings.route_tor):
|
||||
msg = ('Failed to connect to Tor. Check that Tor is open and '
|
||||
'that your internet connection is working.\n\n'
|
||||
+ str(e))
|
||||
logger.error(f'Tor connection failed: {msg}')
|
||||
raise FetchError('502', reason='Bad Gateway',
|
||||
error_message=msg)
|
||||
elif isinstance(e.__context__,
|
||||
urllib3.exceptions.NewConnectionError):
|
||||
msg = 'Failed to establish a connection.\n\n' + str(e)
|
||||
logger.error(f'Connection failed: {msg}')
|
||||
raise FetchError(
|
||||
'502', reason='Bad Gateway',
|
||||
error_message=msg)
|
||||
else:
|
||||
raise
|
||||
|
||||
# Wait and retry
|
||||
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
|
||||
time.sleep(delay)
|
||||
|
||||
if report_text:
|
||||
print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:', round(read_finish - response_time,3))
|
||||
logger.info(f'{report_text} - Latency: {round(response_time - start_time, 3)}s - Read time: {round(read_finish - response_time, 3)}s')
|
||||
|
||||
return content
|
||||
|
||||
@@ -462,7 +536,7 @@ class RateLimitedQueue(gevent.queue.Queue):
|
||||
|
||||
|
||||
def download_thumbnail(save_directory, video_id):
|
||||
url = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
|
||||
url = f"https://i.ytimg.com/vi/{video_id}/hq720.jpg"
|
||||
save_location = os.path.join(save_directory, video_id + ".jpg")
|
||||
try:
|
||||
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
|
||||
@@ -502,9 +576,40 @@ def video_id(url):
|
||||
return urllib.parse.parse_qs(url_parts.query)['v'][0]
|
||||
|
||||
|
||||
# default, sddefault, mqdefault, hqdefault, hq720
|
||||
def get_thumbnail_url(video_id):
|
||||
return f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
|
||||
def get_thumbnail_url(video_id, quality='hq720'):
|
||||
"""Get thumbnail URL with fallback to lower quality if needed.
|
||||
|
||||
Args:
|
||||
video_id: YouTube video ID
|
||||
quality: Preferred quality ('maxres', 'hq720', 'sd', 'hq', 'mq', 'default')
|
||||
|
||||
Returns:
|
||||
Tuple of (best_available_url, quality_used)
|
||||
"""
|
||||
# Quality priority order (highest to lowest)
|
||||
quality_order = {
|
||||
'maxres': ['maxresdefault.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
|
||||
'hq720': ['hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
|
||||
'sd': ['sddefault.jpg', 'hqdefault.jpg'],
|
||||
'hq': ['hqdefault.jpg', 'mqdefault.jpg'],
|
||||
'mq': ['mqdefault.jpg', 'default.jpg'],
|
||||
'default': ['default.jpg'],
|
||||
}
|
||||
|
||||
qualities = quality_order.get(quality, quality_order['hq720'])
|
||||
base_url = f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/"
|
||||
|
||||
# For now, return the highest quality URL
|
||||
# The browser will handle 404s gracefully with alt text
|
||||
return base_url + qualities[0], qualities[0]
|
||||
|
||||
|
||||
def get_best_thumbnail_url(video_id):
|
||||
"""Get the best available thumbnail URL for a video.
|
||||
|
||||
Tries hq720 first (for HD videos), falls back to sddefault for SD videos.
|
||||
"""
|
||||
return get_thumbnail_url(video_id, quality='hq720')[0]
|
||||
|
||||
|
||||
def seconds_to_timestamp(seconds):
|
||||
@@ -538,6 +643,12 @@ def prefix_url(url):
|
||||
if url is None:
|
||||
return None
|
||||
url = url.lstrip('/') # some urls have // before them, which has a special meaning
|
||||
|
||||
# Increase resolution for YouTube channel avatars
|
||||
if url and ('ggpht.com' in url or 'yt3.ggpht.com' in url):
|
||||
# Replace size parameter with higher resolution (s240 instead of s88)
|
||||
url = re.sub(r'=s\d+-c-k', '=s240-c-k-c0x00ffffff-no-rj', url)
|
||||
|
||||
return '/' + url
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user