Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integration
Some checks failed
CI / test (push) Failing after 1m19s

Major Features:
- HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities
- HD channel avatars (240x240 instead of 88x88)
- YouTube 2024+ lockupViewModel support for channel playlists
- youtubei/v1/browse API integration for channel playlist tabs
- yt-dlp integration for multi-language audio and subtitles

Bug Fixes:
- Fixed undefined `abort` import in playlist.py
- Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode)
- Fixed missing `traceback` import in proto_debug.py
- Fixed blurry playlist thumbnails using default.jpg instead of HD versions
- Fixed channel playlists page using deprecated pbj=1 format

Improvements:
- Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default)
- JavaScript thumbnail_fallback() handler for 404 errors
- Better thumbnail quality across all pages (watch, channel, playlist, subscriptions)
- Consistent HD avatar display for all channel items
- Settings system automatically adds new settings without breaking user config

Files Modified:
- youtube/watch.py - HD thumbnails for related videos and playlist items
- youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration
- youtube/playlist.py - HD thumbnails, fixed abort import
- youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements
- youtube/comments.py - HD video thumbnail
- youtube/subscriptions.py - HD thumbnails, fixed abort import
- youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info()
- youtube/yt_data_extract/everything_else.py - HD playlist thumbnails
- youtube/proto.py - Fixed undefined function references
- youtube/proto_debug.py - Added traceback import
- youtube/static/js/common.js - thumbnail_fallback() handler
- youtube/templates/*.html - Added onerror handlers for thumbnail fallback
- youtube/version.py - Bump to v0.4.0

Technical Details:
- All thumbnail URLs now use hq720.jpg (1280x720) when available
- Fallback handled client-side via JavaScript onerror handler
- Server-side avatar upgrade via regex in util.prefix_url()
- lockupViewModel parser extracts contentType, metadata, and first_video_id
- Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1
- Settings version system ensures backward compatibility
This commit is contained in:
2026-03-22 20:50:03 -05:00
parent 84e1acaab8
commit 6a68f06645
25 changed files with 929 additions and 231 deletions

View File

@@ -1,4 +1,5 @@
from datetime import datetime
import logging
import settings
import socks
import sockshandler
@@ -18,6 +19,8 @@ import gevent.queue
import gevent.lock
import collections
import stem
logger = logging.getLogger(__name__)
import stem.control
import traceback
@@ -302,73 +305,144 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None):
while True:
start_time = time.monotonic()
"""
Fetch URL with exponential backoff retry logic for rate limiting.
response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout, data=data,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.monotonic()
Retries:
- 429 Too Many Requests: Exponential backoff (1s, 2s, 4s, 8s, 16s)
- 503 Service Unavailable: Exponential backoff
- 302 Redirect to Google Sorry: Treated as rate limit
content = response.read()
Max retries: 5 attempts with exponential backoff
"""
import random
read_finish = time.monotonic()
max_retries = 5
base_delay = 1.0 # Base delay in seconds
cleanup_func(response) # release_connection for urllib3
content = decode_content(
content,
response.headers.get('Content-Encoding', default='identity'))
for attempt in range(max_retries):
try:
start_time = time.monotonic()
if (settings.debugging_save_responses
and debug_name is not None
and content):
save_dir = os.path.join(settings.data_dir, 'debug')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout, data=data,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.monotonic()
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
content = response.read()
if response.status == 429 or (
response.status == 302 and (response.getheader('Location') == url
or response.getheader('Location').startswith(
'https://www.google.com/sorry/index'
)
)
):
print(response.status, response.reason, response.headers)
ip = re.search(
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content)
ip = ip.group(1).decode('ascii') if ip else None
if not ip:
ip = re.search(r'IP=((?:\d+\.)+\d+)',
response.getheader('Set-Cookie') or '')
ip = ip.group(1) if ip else None
read_finish = time.monotonic()
# don't get new identity if we're not using Tor
if not use_tor:
raise FetchError('429', reason=response.reason, ip=ip)
cleanup_func(response) # release_connection for urllib3
content = decode_content(
content,
response.headers.get('Content-Encoding', default='identity'))
print('Error: YouTube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip)
if (settings.debugging_save_responses
and debug_name is not None
and content):
save_dir = os.path.join(settings.data_dir, 'debug')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# get new identity
error = tor_manager.new_identity(start_time)
if error:
raise FetchError(
'429', reason=response.reason, ip=ip,
error_message='Automatic circuit change: ' + error)
else:
continue # retry now that we have new identity
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason,
ip=None)
break
# Check for rate limiting (429) or redirect to Google Sorry
if response.status == 429 or (
response.status == 302 and (response.getheader('Location') == url
or response.getheader('Location').startswith(
'https://www.google.com/sorry/index'
)
)
):
logger.info(f'Rate limit response: {response.status} {response.reason}')
ip = re.search(
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content)
ip = ip.group(1).decode('ascii') if ip else None
if not ip:
ip = re.search(r'IP=((?:\d+\.)+\d+)',
response.getheader('Set-Cookie') or '')
ip = ip.group(1) if ip else None
# If this is the last attempt, raise error
if attempt >= max_retries - 1:
if not use_tor or not settings.route_tor:
logger.warning(f'YouTube returned 429 but Tor is not enabled. Consider enabling Tor routing.')
raise FetchError('429', reason=response.reason, ip=ip)
logger.error(f'YouTube blocked request - Tor exit node overutilized. Exit IP: {ip}')
# get new identity
error = tor_manager.new_identity(start_time)
if error:
raise FetchError(
'429', reason=response.reason, ip=ip,
error_message='Automatic circuit change: ' + error)
else:
continue # retry with new identity
# Calculate delay with exponential backoff and jitter
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.info(f'Rate limited (429). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
continue # retry
# Check for client errors (400, 404) - don't retry these
if response.status == 400:
logger.error(f'Bad Request (400) - Invalid parameters or URL: {url[:100]}')
raise FetchError('400', reason='Bad Request - Invalid parameters or URL format', ip=None)
if response.status == 404:
logger.warning(f'Not Found (404): {url[:100]}')
raise FetchError('404', reason='Not Found', ip=None)
# Check for other server errors (503, 502, 504)
if response.status in (502, 503, 504):
if attempt >= max_retries - 1:
logger.error(f'Server error {response.status} after {max_retries} retries')
raise FetchError(str(response.status), reason=response.reason, ip=None)
# Exponential backoff for server errors
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
continue
# Success - break out of retry loop
break
except urllib3.exceptions.MaxRetryError as e:
# If this is the last attempt, raise the error
if attempt >= max_retries - 1:
exception_cause = e.__context__.__context__
if (isinstance(exception_cause, socks.ProxyConnectionError)
and settings.route_tor):
msg = ('Failed to connect to Tor. Check that Tor is open and '
'that your internet connection is working.\n\n'
+ str(e))
logger.error(f'Tor connection failed: {msg}')
raise FetchError('502', reason='Bad Gateway',
error_message=msg)
elif isinstance(e.__context__,
urllib3.exceptions.NewConnectionError):
msg = 'Failed to establish a connection.\n\n' + str(e)
logger.error(f'Connection failed: {msg}')
raise FetchError(
'502', reason='Bad Gateway',
error_message=msg)
else:
raise
# Wait and retry
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
if report_text:
print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:', round(read_finish - response_time,3))
logger.info(f'{report_text} - Latency: {round(response_time - start_time, 3)}s - Read time: {round(read_finish - response_time, 3)}s')
return content
@@ -462,7 +536,7 @@ class RateLimitedQueue(gevent.queue.Queue):
def download_thumbnail(save_directory, video_id):
url = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
url = f"https://i.ytimg.com/vi/{video_id}/hq720.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg")
try:
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
@@ -502,9 +576,40 @@ def video_id(url):
return urllib.parse.parse_qs(url_parts.query)['v'][0]
# default, sddefault, mqdefault, hqdefault, hq720
def get_thumbnail_url(video_id):
return f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
def get_thumbnail_url(video_id, quality='hq720'):
"""Get thumbnail URL with fallback to lower quality if needed.
Args:
video_id: YouTube video ID
quality: Preferred quality ('maxres', 'hq720', 'sd', 'hq', 'mq', 'default')
Returns:
Tuple of (best_available_url, quality_used)
"""
# Quality priority order (highest to lowest)
quality_order = {
'maxres': ['maxresdefault.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
'hq720': ['hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
'sd': ['sddefault.jpg', 'hqdefault.jpg'],
'hq': ['hqdefault.jpg', 'mqdefault.jpg'],
'mq': ['mqdefault.jpg', 'default.jpg'],
'default': ['default.jpg'],
}
qualities = quality_order.get(quality, quality_order['hq720'])
base_url = f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/"
# For now, return the highest quality URL
# The browser will handle 404s gracefully with alt text
return base_url + qualities[0], qualities[0]
def get_best_thumbnail_url(video_id):
"""Get the best available thumbnail URL for a video.
Tries hq720 first (for HD videos), falls back to sddefault for SD videos.
"""
return get_thumbnail_url(video_id, quality='hq720')[0]
def seconds_to_timestamp(seconds):
@@ -538,6 +643,12 @@ def prefix_url(url):
if url is None:
return None
url = url.lstrip('/') # some urls have // before them, which has a special meaning
# Increase resolution for YouTube channel avatars
if url and ('ggpht.com' in url or 'yt3.ggpht.com' in url):
# Replace size parameter with higher resolution (s240 instead of s88)
url = re.sub(r'=s\d+-c-k', '=s240-c-k-c0x00ffffff-no-rj', url)
return '/' + url