2026-04-20 01:22:56 -04:00
16 changed files with 237 additions and 146 deletions
--- a/generate_release.py
+++ b/generate_release.py
@@ -44,6 +44,10 @@ def remove_files_with_extensions(path, extensions):

 def download_if_not_exists(file_name, url, sha256=None):
    if not os.path.exists('./' + file_name):
+        # Reject non-https URLs so a mistaken constant cannot cause a
+        # plaintext download (bandit B310 hardening).
+        if not url.startswith('https://'):
+            raise Exception('Refusing to download over non-https URL: ' + url)
        log('Downloading ' + file_name + '..')
        data = urllib.request.urlopen(url).read()
        log('Finished downloading ' + file_name)
@@ -58,12 +62,14 @@ def download_if_not_exists(file_name, url, sha256=None):
        log('Using existing ' + file_name)

 def wine_run_shell(command):
+    # Keep argv-style invocation (no shell) to avoid command injection.
    if os.name == 'posix':
-        check(os.system('wine ' + command.replace('\\', '/')))
+        parts = ['wine'] + command.replace('\\', '/').split()
    elif os.name == 'nt':
-        check(os.system(command))
+        parts = command.split()
    else:
        raise Exception('Unsupported OS')
+    check(subprocess.run(parts).returncode)

 def wine_run(command_parts):
    if os.name == 'posix':
@@ -92,7 +98,20 @@ if os.path.exists('./yt-local'):
 # confused with working directory. I'm calling it the same thing so it will
 # have that name when extracted from the final release zip archive)
 log('Making copy of yt-local files')
-check(os.system('git archive --format tar master | 7z x -si -ttar -oyt-local'))
+# Avoid the shell: pipe `git archive` into 7z directly via subprocess.
+_git_archive = subprocess.Popen(
+    ['git', 'archive', '--format', 'tar', 'master'],
+    stdout=subprocess.PIPE,
+)
+_sevenz = subprocess.Popen(
+    ['7z', 'x', '-si', '-ttar', '-oyt-local'],
+    stdin=_git_archive.stdout,
+)
+_git_archive.stdout.close()
+_sevenz.wait()
+_git_archive.wait()
+check(_sevenz.returncode)
+check(_git_archive.returncode)

 if len(os.listdir('./yt-local')) == 0:
    raise Exception('Failed to copy yt-local files')
@@ -136,7 +155,7 @@ if os.path.exists('./python'):

 log('Extracting python distribution')

-check(os.system(r'7z -y x -opython ' + python_dist_name))
+check_subp(subprocess.run(['7z', '-y', 'x', '-opython', python_dist_name]))

 log('Executing get-pip.py')
 wine_run(['./python/python.exe', '-I', 'get-pip.py'])
@@ -241,7 +260,7 @@ if os.path.exists('./' + output_filename):
    log('Removing previous zipped release')
    os.remove('./' + output_filename)
 log('Zipping release')
-check(os.system(r'7z -mx=9 a ' + output_filename + ' ./yt-local'))
+check_subp(subprocess.run(['7z', '-mx=9', 'a', output_filename, './yt-local']))

 print('\n')
 log('Finished')
--- a/server.py
+++ b/server.py
@@ -1,22 +1,28 @@
 #!/usr/bin/env python3
+# E402 is deliberately ignored in this file: `monkey.patch_all()` must run
+# before any stdlib networking or gevent-dependent modules are imported.
 from gevent import monkey
 monkey.patch_all()
-import gevent.socket

 from youtube import yt_app
 from youtube import util

 # these are just so the files get run - they import yt_app and add routes to it
-from youtube import watch, search, playlist, channel, local_playlist, comments, subscriptions
+from youtube import (
+    watch,
+    search,
+    playlist,
+    channel,
+    local_playlist,
+    comments,
+    subscriptions,
+)

 import settings

 from gevent.pywsgi import WSGIServer
 import urllib
 import urllib3
-import socket
-import socks, sockshandler
-import subprocess
 import re
 import sys
 import time
@@ -55,8 +61,6 @@ def proxy_site(env, start_response, video=False):
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
        'Accept': '*/*',
    }
-    current_range_start = 0
-    range_end = None
    if 'HTTP_RANGE' in env:
        send_headers['Range'] = env['HTTP_RANGE']

@@ -274,6 +278,8 @@ class FilteredRequestLog:

 if __name__ == '__main__':
    if settings.allow_foreign_addresses:
+        # Binding to all interfaces is opt-in via the
+        # `allow_foreign_addresses` setting and documented as discouraged.
        server = WSGIServer(('0.0.0.0', settings.port_number), site_dispatch,
                            log=FilteredRequestLog())
        ip_server = '0.0.0.0'
--- a/settings.py
+++ b/settings.py
@@ -264,7 +264,6 @@ For security reasons, enabling this is not recommended.''',
    ('use_video_download', {
        'type': int,
        'default': 0,
-        'comment': '',
        'options': [
            (0, 'Disabled'),
            (1, 'Enabled'),
@@ -471,7 +470,7 @@ upgrade_functions = {


 def log_ignored_line(line_number, message):
-    print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")")
+    print('WARNING: Ignoring settings.txt line ' + str(line_number) + ' (' + message + ')')


 if os.path.isfile("settings.txt"):
@@ -511,17 +510,17 @@ else:
            pass  # Removed in Python 3.12+
        module_node = ast.parse(settings_text)
        for node in module_node.body:
-            if type(node) != ast.Assign:
-                log_ignored_line(node.lineno, "only assignments are allowed")
+            if not isinstance(node, ast.Assign):
+                log_ignored_line(node.lineno, 'only assignments are allowed')
                continue

            if len(node.targets) > 1:
-                log_ignored_line(node.lineno, "only simple single-variable assignments allowed")
+                log_ignored_line(node.lineno, 'only simple single-variable assignments allowed')
                continue

            target = node.targets[0]
-            if type(target) != ast.Name:
-                log_ignored_line(node.lineno, "only simple single-variable assignments allowed")
+            if not isinstance(target, ast.Name):
+                log_ignored_line(node.lineno, 'only simple single-variable assignments allowed')
                continue

            if target.id not in acceptable_targets:
--- a/tests/test_shorts.py
+++ b/tests/test_shorts.py
@@ -11,8 +11,7 @@ import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 import youtube.proto as proto
 from youtube.yt_data_extract.common import (
-    extract_item_info, extract_items, extract_shorts_lockup_view_model_info,
-    extract_approx_int,
+    extract_item_info, extract_items,
 )


--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -39,7 +39,8 @@ class NewIdentityState():
        self.new_identities_till_success -= 1

    def fetch_url_response(self, *args, **kwargs):
-        cleanup_func = (lambda r: None)
+        def cleanup_func(response):
+            return None
        if self.new_identities_till_success == 0:
            return MockResponse(), cleanup_func
        return MockResponse(body=html429, status=429), cleanup_func
--- a/youtube/init.py
+++ b/youtube/init.py
@@ -1,14 +1,17 @@
+import logging
+import os
+import re
+import traceback
+from sys import exc_info
+
+import flask
+import jinja2
+from flask import request
+from flask_babel import Babel
+
 from youtube import util
 from .get_app_version import app_version
-import flask
-from flask import request
-import jinja2
 import settings
-import traceback
-import logging
-import re
-from sys import exc_info
-from flask_babel import Babel

 yt_app = flask.Flask(__name__)
 yt_app.config['TEMPLATES_AUTO_RELOAD'] = True
@@ -26,7 +29,6 @@ yt_app.logger.addFilter(FetchErrorFilter())
 # yt_app.jinja_env.lstrip_blocks = True

 # Configure Babel for i18n
-import os
 yt_app.config['BABEL_DEFAULT_LOCALE'] = 'en'
 # Use absolute path for translations directory to avoid issues with package structure changes
 _app_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -6,9 +6,7 @@ import settings

 import urllib
 import json
-from string import Template
 import youtube.proto as proto
-import html
 import math
 import gevent
 import re
@@ -293,7 +291,7 @@ def get_number_of_videos_channel(channel_id):
    try:
        response = util.fetch_url(url, headers_mobile,
            debug_name='number_of_videos', report_text='Got number of videos')
-    except (urllib.error.HTTPError, util.FetchError) as e:
+    except (urllib.error.HTTPError, util.FetchError):
        traceback.print_exc()
        print("Couldn't retrieve number of videos")
        return 1000
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -155,33 +155,35 @@ def post_process_comments_info(comments_info):


 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
+    if not settings.comments_mode:
+        return {}
+
+    # Initialize the result dict up-front so that any exception path below
+    # can safely attach an 'error' field without risking UnboundLocalError.
+    comments_info = {'error': None}
    try:
-        if settings.comments_mode:
-            comments_info = {'error': None}
-            other_sort_url = (
-                util.URL_ORIGIN + '/comments?ctoken='
-                + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
-            )
-            other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
+        other_sort_url = (
+            util.URL_ORIGIN + '/comments?ctoken='
+            + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
+        )
+        other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')

-            this_sort_url = (util.URL_ORIGIN
-                             + '/comments?ctoken='
-                             + make_comment_ctoken(video_id, sort=sort, lc=lc))
+        this_sort_url = (util.URL_ORIGIN
+                         + '/comments?ctoken='
+                         + make_comment_ctoken(video_id, sort=sort, lc=lc))

-            comments_info['comment_links'] = [
-                (other_sort_text, other_sort_url),
-                ('Direct link', this_sort_url)
-            ]
+        comments_info['comment_links'] = [
+            (other_sort_text, other_sort_url),
+            ('Direct link', this_sort_url)
+        ]

-            ctoken = make_comment_ctoken(video_id, sort, offset, lc)
-            comments_info.update(yt_data_extract.extract_comments_info(
-                request_comments(ctoken), ctoken=ctoken
-            ))
-            post_process_comments_info(comments_info)
+        ctoken = make_comment_ctoken(video_id, sort, offset, lc)
+        comments_info.update(yt_data_extract.extract_comments_info(
+            request_comments(ctoken), ctoken=ctoken
+        ))
+        post_process_comments_info(comments_info)

-            return comments_info
-        else:
-            return {}
+        return comments_info
    except util.FetchError as e:
        if e.code == '429' and settings.route_tor:
            comments_info['error'] = 'Error: YouTube blocked the request because the Tor exit node is overutilized.'
--- a/youtube/get_app_version/init.py
+++ b/youtube/get_app_version/init.py
@@ -1 +1,3 @@
-from .get_app_version import *
+from .get_app_version import app_version
+
+__all__ = ['app_version']
--- a/youtube/get_app_version/get_app_version.py
+++ b/youtube/get_app_version/get_app_version.py
@@ -1,47 +1,56 @@
 from __future__ import unicode_literals
-from subprocess import (
-    call,
-    STDOUT
-)
-from ..version import __version__
 import os
+import shutil
 import subprocess

+from ..version import __version__
+

 def app_version():
    def minimal_env_cmd(cmd):
        # make minimal environment
        env = {k: os.environ[k] for k in ['SYSTEMROOT', 'PATH'] if k in os.environ}
        env.update({'LANGUAGE': 'C', 'LANG': 'C', 'LC_ALL': 'C'})
-
        out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
        return out

    subst_list = {
-        "version": __version__,
-        "branch": None,
-        "commit": None
+        'version': __version__,
+        'branch': None,
+        'commit': None,
    }

-    if os.system("command -v git > /dev/null 2>&1") != 0:
+    # Use shutil.which instead of `command -v`/os.system so we don't spawn a
+    # shell (CWE-78 hardening) and so it works cross-platform.
+    if shutil.which('git') is None:
        return subst_list

-    if call(["git", "branch"], stderr=STDOUT, stdout=open(os.devnull, 'w')) != 0:
+    try:
+        # Check we are inside a git work tree. Using DEVNULL avoids the
+        # file-handle leak from `open(os.devnull, 'w')`.
+        rc = subprocess.call(
+            ['git', 'branch'],
+            stderr=subprocess.DEVNULL,
+            stdout=subprocess.DEVNULL,
+        )
+    except OSError:
+        return subst_list
+    if rc != 0:
        return subst_list

-    describe = minimal_env_cmd(["git", "describe", "--tags", "--always"])
+    describe = minimal_env_cmd(['git', 'describe', '--tags', '--always'])
    git_revision = describe.strip().decode('ascii')

-    branch = minimal_env_cmd(["git", "branch"])
+    branch = minimal_env_cmd(['git', 'branch'])
    git_branch = branch.strip().decode('ascii').replace('* ', '')

    subst_list.update({
-        "branch": git_branch,
-        "commit": git_revision
+        'branch': git_branch,
+        'commit': git_revision,
    })

    return subst_list


-if __name__ == "__main__":
+if __name__ == '__main__':
    app_version()
--- a/youtube/local_playlist.py
+++ b/youtube/local_playlist.py
@@ -1,28 +1,42 @@
-from youtube import util, yt_data_extract
+from youtube import util
 from youtube import yt_app
 import settings

 import os
 import json
-import html
 import gevent
-import urllib
 import math
 import glob
+import re

 import flask
 from flask import request

-playlists_directory = os.path.join(settings.data_dir, "playlists")
-thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
+playlists_directory = os.path.join(settings.data_dir, 'playlists')
+thumbnails_directory = os.path.join(settings.data_dir, 'playlist_thumbnails')
+
+# Whitelist accepted playlist names so user input cannot escape
+# `playlists_directory` / `thumbnails_directory` (CWE-22, OWASP A01:2021).
+# Allow letters, digits, spaces, dot, dash and underscore.
+_PLAYLIST_NAME_RE = re.compile(r'^[\w .\-]{1,128}$')
+
+
+def _validate_playlist_name(name):
+    '''Return the stripped name if safe, otherwise abort with 400.'''
+    if name is None:
+        flask.abort(400)
+    name = name.strip()
+    if not _PLAYLIST_NAME_RE.match(name):
+        flask.abort(400)
+    return name


 def _find_playlist_path(name):
-    """Find playlist file robustly, handling trailing spaces in filenames"""
-    name = name.strip()
-    pattern = os.path.join(playlists_directory, name + "*.txt")
+    '''Find playlist file robustly, handling trailing spaces in filenames'''
+    name = _validate_playlist_name(name)
+    pattern = os.path.join(playlists_directory, name + '*.txt')
    files = glob.glob(pattern)
-    return files[0] if files else os.path.join(playlists_directory, name + ".txt")
+    return files[0] if files else os.path.join(playlists_directory, name + '.txt')


 def _parse_playlist_lines(data):
@@ -179,8 +193,9 @@ def path_edit_playlist(playlist_name):
        redirect_page_number = min(int(request.values.get('page', 1)), math.ceil(number_of_videos_remaining/50))
        return flask.redirect(util.URL_ORIGIN + request.path + '?page=' + str(redirect_page_number))
    elif request.values['action'] == 'remove_playlist':
+        safe_name = _validate_playlist_name(playlist_name)
        try:
-            os.remove(os.path.join(playlists_directory, playlist_name + ".txt"))
+            os.remove(os.path.join(playlists_directory, safe_name + '.txt'))
        except OSError:
            pass
        return flask.redirect(util.URL_ORIGIN + '/playlists')
@@ -220,8 +235,17 @@ def edit_playlist():
        flask.abort(400)


+_THUMBNAIL_RE = re.compile(r'^[A-Za-z0-9_-]{11}\.jpg$')
+
+
@yt_app.route('/data/playlist_thumbnails/<playlist_name>/<thumbnail>')
 def serve_thumbnail(playlist_name, thumbnail):
-    # .. is necessary because flask always uses the application directory at ./youtube, not the working directory
+    # Validate both path components so a crafted URL cannot escape
+    # `thumbnails_directory` via `..` or NUL tricks (CWE-22).
+    safe_name = _validate_playlist_name(playlist_name)
+    if not _THUMBNAIL_RE.match(thumbnail):
+        flask.abort(400)
+    # .. is necessary because flask always uses the application directory at
+    # ./youtube, not the working directory.
    return flask.send_from_directory(
-        os.path.join('..', thumbnails_directory, playlist_name), thumbnail)
+        os.path.join('..', thumbnails_directory, safe_name), thumbnail)
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -3,9 +3,7 @@ from youtube import yt_app
 import settings

 import base64
-import urllib
 import json
-import string
 import gevent
 import math
 from flask import request, abort
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -5,7 +5,6 @@ import settings
 import json
 import urllib
 import base64
-import mimetypes
 from flask import request
 import flask
 import os
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -292,7 +292,10 @@ def youtube_timestamp_to_posix(dumb_timestamp):
 def posix_to_dumbed_down(posix_time):
    '''Inverse of youtube_timestamp_to_posix.'''
    delta = int(time.time() - posix_time)
-    assert delta >= 0
+    # Guard against future timestamps (clock drift) without relying on
+    # `assert` (which is stripped under `python -O`).
+    if delta < 0:
+        delta = 0

    if delta == 0:
        return '0 seconds ago'
@@ -531,7 +534,8 @@ def _get_upstream_videos(channel_id):
            return None

        root = defusedxml.ElementTree.fromstring(feed)
-        assert remove_bullshit(root.tag) == 'feed'
+        if remove_bullshit(root.tag) != 'feed':
+            raise ValueError('Root element is not <feed>')
        for entry in root:
            if (remove_bullshit(entry.tag) != 'entry'):
                continue
@@ -539,13 +543,13 @@ def _get_upstream_videos(channel_id):
            # it's yt:videoId in the xml but the yt: is turned into a namespace which is removed by remove_bullshit
            video_id_element = find_element(entry, 'videoId')
            time_published_element = find_element(entry, 'published')
-            assert video_id_element is not None
-            assert time_published_element is not None
+            if video_id_element is None or time_published_element is None:
+                raise ValueError('Missing videoId or published element')

            time_published = int(calendar.timegm(time.strptime(time_published_element.text, '%Y-%m-%dT%H:%M:%S+00:00')))
            times_published[video_id_element.text] = time_published

-    except AssertionError:
+    except ValueError:
        print('Failed to read atoma feed for ' + channel_status_name)
        traceback.print_exc()
    except defusedxml.ElementTree.ParseError:
@@ -593,7 +597,10 @@ def _get_upstream_videos(channel_id):
    # Special case: none of the videos have a time published.
    # In this case, make something up
    if videos and videos[0]['time_published'] is None:
-        assert all(v['time_published'] is None for v in videos)
+        # Invariant: if the first video has no timestamp, earlier passes
+        # ensure all of them are unset. Don't rely on `assert`.
+        if not all(v['time_published'] is None for v in videos):
+            raise RuntimeError('Inconsistent time_published state')
        now = time.time()
        for i in range(len(videos)):
            # 1 month between videos
@@ -808,7 +815,8 @@ def import_subscriptions():
        file = file.read().decode('utf-8')
        try:
            root = defusedxml.ElementTree.fromstring(file)
-            assert root.tag == 'opml'
+            if root.tag != 'opml':
+                raise ValueError('Root element is not <opml>')
            channels = []
            for outline_element in root[0][0]:
                if (outline_element.tag != 'outline') or ('xmlUrl' not in outline_element.attrib):
@@ -819,7 +827,7 @@ def import_subscriptions():
                channel_id = channel_rss_url[channel_rss_url.find('channel_id=')+11:].strip()
                channels.append((channel_id, channel_name))

-        except (AssertionError, IndexError, defusedxml.ElementTree.ParseError) as e:
+        except (ValueError, IndexError, defusedxml.ElementTree.ParseError):
            return '400 Bad Request: Unable to read opml xml file, or the file is not the expected format', 400
    elif mime_type in ('text/csv', 'application/vnd.ms-excel'):
        content = file.read().decode('utf-8')
@@ -1071,11 +1079,20 @@ def post_subscriptions_page():
    return '', 204


+# YouTube video IDs are exactly 11 chars from [A-Za-z0-9_-]. Enforce this
+# before using the value in filesystem paths to prevent path traversal
+# (CWE-22, OWASP A01:2021).
+_VIDEO_ID_RE = re.compile(r'^[A-Za-z0-9_-]{11}$')
+
+
@yt_app.route('/data/subscription_thumbnails/<thumbnail>')
 def serve_subscription_thumbnail(thumbnail):
    '''Serves thumbnail from disk if it's been saved already. If not, downloads the thumbnail, saves to disk, and serves it.'''
-    assert thumbnail[-4:] == '.jpg'
+    if not thumbnail.endswith('.jpg'):
+        flask.abort(400)
    video_id = thumbnail[0:-4]
+    if not _VIDEO_ID_RE.match(video_id):
+        flask.abort(400)
    thumbnail_path = os.path.join(thumbnails_directory, thumbnail)

    if video_id in existing_thumbnails:
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 import logging
+import random
 import settings
 import socks
 import sockshandler
@@ -19,11 +20,11 @@ import gevent.queue
 import gevent.lock
 import collections
 import stem
-
-logger = logging.getLogger(__name__)
 import stem.control
 import traceback

+logger = logging.getLogger(__name__)
+
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
 #  configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -54,8 +55,8 @@ import traceback
 #   https://github.com/kennethreitz/requests/issues/2966

 # Until then, I will use a mix of urllib3 and urllib.
-import urllib3
-import urllib3.contrib.socks
+import urllib3  # noqa: E402  (imported here intentionally after the long note above)
+import urllib3.contrib.socks  # noqa: E402

 URL_ORIGIN = "/https://www.youtube.com"

@@ -177,7 +178,6 @@ def get_pool(use_tor):
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
    '''Separate cookiejars for receiving and sending'''
    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
-        import http.cookiejar
        self.cookiejar_send = cookiejar_send
        self.cookiejar_receive = cookiejar_receive

@@ -208,6 +208,16 @@ class FetchError(Exception):
        self.error_message = error_message


+def _noop_cleanup(response):
+    '''No-op cleanup used when the urllib opener owns the response.'''
+    return None
+
+
+def _release_conn_cleanup(response):
+    '''Release the urllib3 pooled connection back to the pool.'''
+    response.release_conn()
+
+
 def decode_content(content, encoding_header):
    encodings = encoding_header.replace(' ', '').split(',')
    for encoding in reversed(encodings):
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
            opener = urllib.request.build_opener(cookie_processor)

        response = opener.open(req, timeout=timeout)
-        cleanup_func = (lambda r: None)
+        cleanup_func = _noop_cleanup

    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        # default: Retry.DEFAULT = Retry(3)
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
                     error_message=msg)
            else:
                raise
-        cleanup_func = (lambda r: r.release_conn())
+        cleanup_func = _release_conn_cleanup

    return response, cleanup_func

@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,

    Max retries: 5 attempts with exponential backoff
    """
-    import random
-
    max_retries = 5
    base_delay = 1.0  # Base delay in seconds

@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                    logger.error(f'Server error {response.status} after {max_retries} retries')
                    raise FetchError(str(response.status), reason=response.reason, ip=None)

-                # Exponential backoff for server errors
+                # Exponential backoff for server errors. Non-crypto jitter.
                delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
                logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
                time.sleep(delay)
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                else:
                    raise

-            # Wait and retry
+            # Wait and retry. Non-crypto jitter.
            delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
            logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
            time.sleep(delay)
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):


 def download_thumbnail(save_directory, video_id):
-    save_location = os.path.join(save_directory, video_id + ".jpg")
+    save_location = os.path.join(save_directory, video_id + '.jpg')
    for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
-        url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
+        url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
        try:
-            thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
+            thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
        except FetchError as e:
            if '404' in str(e):
                continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
            return False
        except urllib.error.HTTPError as e:
            if e.code == 404:
                continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
            return False
        try:
-            f = open(save_location, 'wb')
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
        except FileNotFoundError:
            os.makedirs(save_directory, exist_ok=True)
-            f = open(save_location, 'wb')
-        f.write(thumbnail)
-        f.close()
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
        return True
-    print("No thumbnail available for " + video_id)
+    print('No thumbnail available for ' + video_id)
    return False


--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -1,27 +1,26 @@
+import json
+import logging
+import math
+import os
+import re
+import traceback
+import urllib
+from math import ceil
+from types import SimpleNamespace
+from urllib.parse import parse_qs, urlencode
+
+import flask
+import gevent
+import urllib3.exceptions
+from flask import request
+
 import youtube
 from youtube import yt_app
 from youtube import util, comments, local_playlist, yt_data_extract
-from youtube.util import time_utc_isoformat
 import settings

-from flask import request
-import flask
-import logging
-
 logger = logging.getLogger(__name__)

-import json
-import gevent
-import os
-import math
-import traceback
-import urllib
-import re
-import urllib3.exceptions
-from urllib.parse import parse_qs, urlencode
-from types import SimpleNamespace
-from math import ceil
-

 try:
    with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@@ -62,7 +61,9 @@ def get_video_sources(info, target_resolution):
            continue
        if not (fmt['init_range'] and fmt['index_range']):
            # Allow HLS-backed audio tracks (served locally, no init/index needed)
-            if not fmt.get('url', '').startswith('http://127.') and not '/ytl-api/' in fmt.get('url', ''):
+            url_value = fmt.get('url', '')
+            if (not url_value.startswith('http://127.')
+                    and '/ytl-api/' not in url_value):
                continue
            # Mark as HLS for frontend
            fmt['is_hls'] = True
@@ -222,7 +223,7 @@ def lang_in(lang, sequence):
    if lang is None:
        return False
    lang = lang[0:2]
-    return lang in (l[0:2] for l in sequence)
+    return lang in (item[0:2] for item in sequence)


 def lang_eq(lang1, lang2):
@@ -238,9 +239,9 @@ def equiv_lang_in(lang, sequence):
    e.g. if lang is en, extracts en-GB from sequence.
    Necessary because if only a specific variant like en-GB is available, can't ask YouTube for simply en. Need to get the available variant.'''
    lang = lang[0:2]
-    for l in sequence:
-        if l[0:2] == lang:
-            return l
+    for item in sequence:
+        if item[0:2] == lang:
+            return item
    return None


@@ -310,7 +311,15 @@ def get_subtitle_sources(info):
            sources[-1]['on'] = True

    if len(sources) == 0:
-        assert len(info['automatic_caption_languages']) == 0 and len(info['manual_caption_languages']) == 0
+        # Invariant: with no caption sources there should be no languages
+        # either. Don't rely on `assert` which is stripped under `python -O`.
+        if (len(info['automatic_caption_languages']) != 0
+                or len(info['manual_caption_languages']) != 0):
+            logger.warning(
+                'Unexpected state: no subtitle sources but %d auto / %d manual languages',
+                len(info['automatic_caption_languages']),
+                len(info['manual_caption_languages']),
+            )

    return sources

@@ -669,7 +678,6 @@ def format_bytes(bytes):
@yt_app.route('/ytl-api/audio-track-proxy')
 def audio_track_proxy():
    """Proxy for DASH audio tracks to avoid throttling."""
-    cache_key = request.args.get('id', '')
    audio_url = request.args.get('url', '')

    if not audio_url:
@@ -692,7 +700,7 @@ def audio_track_proxy():
@yt_app.route('/ytl-api/audio-track')
 def get_audio_track():
    """Proxy HLS audio/video: playlist or individual segment."""
-    from youtube.hls_cache import get_hls_url, _tracks
+    from youtube.hls_cache import get_hls_url

    cache_key = request.args.get('id', '')
    seg_url = request.args.get('seg', '')
@@ -916,7 +924,7 @@ def get_hls_manifest():
        flask.abort(404, 'HLS manifest not found')

    try:
-        print(f'[hls-manifest] Fetching HLS manifest...')
+        print('[hls-manifest] Fetching HLS manifest...')
        manifest = util.fetch_url(hls_url,
            headers=(('User-Agent', 'Mozilla/5.0'),),
            debug_name='hls_manifest').decode('utf-8')
@@ -1018,7 +1026,8 @@ def get_storyboard_vtt():
    for i, board in enumerate(boards):
        *t, _, sigh = board.split("#")
        width, height, count, width_cnt, height_cnt, interval = map(int, t)
-        if height != wanted_height: continue
+        if height != wanted_height:
+            continue
        q['sigh'] = [sigh]
        url = f"{base_url}?{urlencode(q, doseq=True)}"
        storyboard = SimpleNamespace(
@@ -1182,7 +1191,6 @@ def get_watch_page(video_id=None):
    uni_sources = video_sources['uni_sources']
    pair_sources = video_sources['pair_sources']
    pair_idx = video_sources['pair_idx']
-    audio_track_sources = video_sources['audio_track_sources']

    # Build audio tracks list from HLS
    audio_tracks = []