From d6190a2d0baa37c17a446540ae4ca97b716096b2 Mon Sep 17 00:00:00 2001
From: Astounds <kirito@disroot.org>
Date: Mon, 20 Apr 2026 00:39:35 -0500
Subject: [PATCH] security: harden code against command injection and path
 traversal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Core changes:

* enforce HTTPS URLs and remove shell usage in generate_release.py
* replace os.system calls with subprocess across the codebase
* validate external inputs (playlist names, video IDs)

Improvements and fixes:

* settings.py: fix typo (node.lineno → line_number); use isinstance() over type()
* youtube/get_app_version: improve git detection using subprocess.DEVNULL
* youtube/util.py: add cleanup helpers; use shutil.which for binary resolution

YouTube modules:

* watch.py: detect and flag HLS streams; remove unused audio_track_sources
* comments.py: return early when comments are disabled; add error handling
* local_playlist.py: validate playlist names to prevent path traversal
* subscriptions.py: replace asserts with proper error handling; validate video IDs

Cleanup:

* remove unused imports across modules (playlist, search, channel)
* reorganize package imports in youtube/**init**.py
* simplify test imports and fix cleanup_func in tests

Tests:

* tests/test_shorts.py: simplify imports
* tests/test_util.py: fix cleanup_func definition
---
 generate_release.py                        | 29 ++++++++--
 server.py                                  | 20 ++++---
 settings.py                                | 13 ++---
 tests/test_shorts.py                       |  3 +-
 tests/test_util.py                         |  3 +-
 youtube/__init__.py                        | 20 ++++---
 youtube/channel.py                         |  4 +-
 youtube/comments.py                        | 46 ++++++++--------
 youtube/get_app_version/__init__.py        |  4 +-
 youtube/get_app_version/get_app_version.py | 41 ++++++++------
 youtube/local_playlist.py                  | 48 ++++++++++++----
 youtube/playlist.py                        |  2 -
 youtube/search.py                          |  1 -
 youtube/subscriptions.py                   | 35 +++++++++---
 youtube/util.py                            | 50 ++++++++++-------
 youtube/watch.py                           | 64 ++++++++++++----------
 16 files changed, 237 insertions(+), 146 deletions(-)

diff --git a/generate_release.py b/generate_release.py
index 8bec3a0..5ee0a51 100644
--- a/generate_release.py
+++ b/generate_release.py
@@ -44,6 +44,10 @@ def remove_files_with_extensions(path, extensions):
 
 def download_if_not_exists(file_name, url, sha256=None):
     if not os.path.exists('./' + file_name):
+        # Reject non-https URLs so a mistaken constant cannot cause a
+        # plaintext download (bandit B310 hardening).
+        if not url.startswith('https://'):
+            raise Exception('Refusing to download over non-https URL: ' + url)
         log('Downloading ' + file_name + '..')
         data = urllib.request.urlopen(url).read()
         log('Finished downloading ' + file_name)
@@ -58,12 +62,14 @@ def download_if_not_exists(file_name, url, sha256=None):
         log('Using existing ' + file_name)
 
 def wine_run_shell(command):
+    # Keep argv-style invocation (no shell) to avoid command injection.
     if os.name == 'posix':
-        check(os.system('wine ' + command.replace('\\', '/')))
+        parts = ['wine'] + command.replace('\\', '/').split()
     elif os.name == 'nt':
-        check(os.system(command))
+        parts = command.split()
     else:
         raise Exception('Unsupported OS')
+    check(subprocess.run(parts).returncode)
 
 def wine_run(command_parts):
     if os.name == 'posix':
@@ -92,7 +98,20 @@ if os.path.exists('./yt-local'):
 # confused with working directory. I'm calling it the same thing so it will
 # have that name when extracted from the final release zip archive)
 log('Making copy of yt-local files')
-check(os.system('git archive --format tar master | 7z x -si -ttar -oyt-local'))
+# Avoid the shell: pipe `git archive` into 7z directly via subprocess.
+_git_archive = subprocess.Popen(
+    ['git', 'archive', '--format', 'tar', 'master'],
+    stdout=subprocess.PIPE,
+)
+_sevenz = subprocess.Popen(
+    ['7z', 'x', '-si', '-ttar', '-oyt-local'],
+    stdin=_git_archive.stdout,
+)
+_git_archive.stdout.close()
+_sevenz.wait()
+_git_archive.wait()
+check(_sevenz.returncode)
+check(_git_archive.returncode)
 
 if len(os.listdir('./yt-local')) == 0:
     raise Exception('Failed to copy yt-local files')
@@ -136,7 +155,7 @@ if os.path.exists('./python'):
 
 log('Extracting python distribution')
 
-check(os.system(r'7z -y x -opython ' + python_dist_name))
+check_subp(subprocess.run(['7z', '-y', 'x', '-opython', python_dist_name]))
 
 log('Executing get-pip.py')
 wine_run(['./python/python.exe', '-I', 'get-pip.py'])
@@ -241,7 +260,7 @@ if os.path.exists('./' + output_filename):
     log('Removing previous zipped release')
     os.remove('./' + output_filename)
 log('Zipping release')
-check(os.system(r'7z -mx=9 a ' + output_filename + ' ./yt-local'))
+check_subp(subprocess.run(['7z', '-mx=9', 'a', output_filename, './yt-local']))
 
 print('\n')
 log('Finished')
diff --git a/server.py b/server.py
index 1a09acb..36ddae5 100644
--- a/server.py
+++ b/server.py
@@ -1,22 +1,28 @@
 #!/usr/bin/env python3
+# E402 is deliberately ignored in this file: `monkey.patch_all()` must run
+# before any stdlib networking or gevent-dependent modules are imported.
 from gevent import monkey
 monkey.patch_all()
-import gevent.socket
 
 from youtube import yt_app
 from youtube import util
 
 # these are just so the files get run - they import yt_app and add routes to it
-from youtube import watch, search, playlist, channel, local_playlist, comments, subscriptions
+from youtube import (
+    watch,
+    search,
+    playlist,
+    channel,
+    local_playlist,
+    comments,
+    subscriptions,
+)
 
 import settings
 
 from gevent.pywsgi import WSGIServer
 import urllib
 import urllib3
-import socket
-import socks, sockshandler
-import subprocess
 import re
 import sys
 import time
@@ -55,8 +61,6 @@ def proxy_site(env, start_response, video=False):
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
         'Accept': '*/*',
     }
-    current_range_start = 0
-    range_end = None
     if 'HTTP_RANGE' in env:
         send_headers['Range'] = env['HTTP_RANGE']
 
@@ -274,6 +278,8 @@ class FilteredRequestLog:
 
 if __name__ == '__main__':
     if settings.allow_foreign_addresses:
+        # Binding to all interfaces is opt-in via the
+        # `allow_foreign_addresses` setting and documented as discouraged.
         server = WSGIServer(('0.0.0.0', settings.port_number), site_dispatch,
                             log=FilteredRequestLog())
         ip_server = '0.0.0.0'
diff --git a/settings.py b/settings.py
index 07511bd..118b13d 100644
--- a/settings.py
+++ b/settings.py
@@ -264,7 +264,6 @@ For security reasons, enabling this is not recommended.''',
     ('use_video_download', {
         'type': int,
         'default': 0,
-        'comment': '',
         'options': [
             (0, 'Disabled'),
             (1, 'Enabled'),
@@ -471,7 +470,7 @@ upgrade_functions = {
 
 
 def log_ignored_line(line_number, message):
-    print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")")
+    print('WARNING: Ignoring settings.txt line ' + str(line_number) + ' (' + message + ')')
 
 
 if os.path.isfile("settings.txt"):
@@ -511,17 +510,17 @@ else:
             pass  # Removed in Python 3.12+
         module_node = ast.parse(settings_text)
         for node in module_node.body:
-            if type(node) != ast.Assign:
-                log_ignored_line(node.lineno, "only assignments are allowed")
+            if not isinstance(node, ast.Assign):
+                log_ignored_line(node.lineno, 'only assignments are allowed')
                 continue
 
             if len(node.targets) > 1:
-                log_ignored_line(node.lineno, "only simple single-variable assignments allowed")
+                log_ignored_line(node.lineno, 'only simple single-variable assignments allowed')
                 continue
 
             target = node.targets[0]
-            if type(target) != ast.Name:
-                log_ignored_line(node.lineno, "only simple single-variable assignments allowed")
+            if not isinstance(target, ast.Name):
+                log_ignored_line(node.lineno, 'only simple single-variable assignments allowed')
                 continue
 
             if target.id not in acceptable_targets:
diff --git a/tests/test_shorts.py b/tests/test_shorts.py
index 9a49450..c5b7301 100644
--- a/tests/test_shorts.py
+++ b/tests/test_shorts.py
@@ -11,8 +11,7 @@ import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 import youtube.proto as proto
 from youtube.yt_data_extract.common import (
-    extract_item_info, extract_items, extract_shorts_lockup_view_model_info,
-    extract_approx_int,
+    extract_item_info, extract_items,
 )
 
 
diff --git a/tests/test_util.py b/tests/test_util.py
index bc10de4..1b444fe 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -39,7 +39,8 @@ class NewIdentityState():
         self.new_identities_till_success -= 1
 
     def fetch_url_response(self, *args, **kwargs):
-        cleanup_func = (lambda r: None)
+        def cleanup_func(response):
+            return None
         if self.new_identities_till_success == 0:
             return MockResponse(), cleanup_func
         return MockResponse(body=html429, status=429), cleanup_func
diff --git a/youtube/__init__.py b/youtube/__init__.py
index 4859589..b0e7cd3 100644
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -1,14 +1,17 @@
+import logging
+import os
+import re
+import traceback
+from sys import exc_info
+
+import flask
+import jinja2
+from flask import request
+from flask_babel import Babel
+
 from youtube import util
 from .get_app_version import app_version
-import flask
-from flask import request
-import jinja2
 import settings
-import traceback
-import logging
-import re
-from sys import exc_info
-from flask_babel import Babel
 
 yt_app = flask.Flask(__name__)
 yt_app.config['TEMPLATES_AUTO_RELOAD'] = True
@@ -26,7 +29,6 @@ yt_app.logger.addFilter(FetchErrorFilter())
 # yt_app.jinja_env.lstrip_blocks = True
 
 # Configure Babel for i18n
-import os
 yt_app.config['BABEL_DEFAULT_LOCALE'] = 'en'
 # Use absolute path for translations directory to avoid issues with package structure changes
 _app_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
diff --git a/youtube/channel.py b/youtube/channel.py
index 17e46f5..14a565e 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -6,9 +6,7 @@ import settings
 
 import urllib
 import json
-from string import Template
 import youtube.proto as proto
-import html
 import math
 import gevent
 import re
@@ -293,7 +291,7 @@ def get_number_of_videos_channel(channel_id):
     try:
         response = util.fetch_url(url, headers_mobile,
             debug_name='number_of_videos', report_text='Got number of videos')
-    except (urllib.error.HTTPError, util.FetchError) as e:
+    except (urllib.error.HTTPError, util.FetchError):
         traceback.print_exc()
         print("Couldn't retrieve number of videos")
         return 1000
diff --git a/youtube/comments.py b/youtube/comments.py
index 1ff1a21..8d03f22 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -155,33 +155,35 @@ def post_process_comments_info(comments_info):
 
 
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
+    if not settings.comments_mode:
+        return {}
+
+    # Initialize the result dict up-front so that any exception path below
+    # can safely attach an 'error' field without risking UnboundLocalError.
+    comments_info = {'error': None}
     try:
-        if settings.comments_mode:
-            comments_info = {'error': None}
-            other_sort_url = (
-                util.URL_ORIGIN + '/comments?ctoken='
-                + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
-            )
-            other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
+        other_sort_url = (
+            util.URL_ORIGIN + '/comments?ctoken='
+            + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
+        )
+        other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
 
-            this_sort_url = (util.URL_ORIGIN
-                             + '/comments?ctoken='
-                             + make_comment_ctoken(video_id, sort=sort, lc=lc))
+        this_sort_url = (util.URL_ORIGIN
+                         + '/comments?ctoken='
+                         + make_comment_ctoken(video_id, sort=sort, lc=lc))
 
-            comments_info['comment_links'] = [
-                (other_sort_text, other_sort_url),
-                ('Direct link', this_sort_url)
-            ]
+        comments_info['comment_links'] = [
+            (other_sort_text, other_sort_url),
+            ('Direct link', this_sort_url)
+        ]
 
-            ctoken = make_comment_ctoken(video_id, sort, offset, lc)
-            comments_info.update(yt_data_extract.extract_comments_info(
-                request_comments(ctoken), ctoken=ctoken
-            ))
-            post_process_comments_info(comments_info)
+        ctoken = make_comment_ctoken(video_id, sort, offset, lc)
+        comments_info.update(yt_data_extract.extract_comments_info(
+            request_comments(ctoken), ctoken=ctoken
+        ))
+        post_process_comments_info(comments_info)
 
-            return comments_info
-        else:
-            return {}
+        return comments_info
     except util.FetchError as e:
         if e.code == '429' and settings.route_tor:
             comments_info['error'] = 'Error: YouTube blocked the request because the Tor exit node is overutilized.'
diff --git a/youtube/get_app_version/__init__.py b/youtube/get_app_version/__init__.py
index 2d5290f..665b7b5 100644
--- a/youtube/get_app_version/__init__.py
+++ b/youtube/get_app_version/__init__.py
@@ -1 +1,3 @@
-from .get_app_version import *
+from .get_app_version import app_version
+
+__all__ = ['app_version']
diff --git a/youtube/get_app_version/get_app_version.py b/youtube/get_app_version/get_app_version.py
index 4995bb7..a73b857 100644
--- a/youtube/get_app_version/get_app_version.py
+++ b/youtube/get_app_version/get_app_version.py
@@ -1,47 +1,56 @@
 from __future__ import unicode_literals
-from subprocess import (
-    call,
-    STDOUT
-)
-from ..version import __version__
 import os
+import shutil
 import subprocess
 
+from ..version import __version__
+
 
 def app_version():
     def minimal_env_cmd(cmd):
         # make minimal environment
         env = {k: os.environ[k] for k in ['SYSTEMROOT', 'PATH'] if k in os.environ}
         env.update({'LANGUAGE': 'C', 'LANG': 'C', 'LC_ALL': 'C'})
-
         out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
         return out
 
     subst_list = {
-        "version": __version__,
-        "branch": None,
-        "commit": None
+        'version': __version__,
+        'branch': None,
+        'commit': None,
     }
 
-    if os.system("command -v git > /dev/null 2>&1") != 0:
+    # Use shutil.which instead of `command -v`/os.system so we don't spawn a
+    # shell (CWE-78 hardening) and so it works cross-platform.
+    if shutil.which('git') is None:
         return subst_list
 
-    if call(["git", "branch"], stderr=STDOUT, stdout=open(os.devnull, 'w')) != 0:
+    try:
+        # Check we are inside a git work tree. Using DEVNULL avoids the
+        # file-handle leak from `open(os.devnull, 'w')`.
+        rc = subprocess.call(
+            ['git', 'branch'],
+            stderr=subprocess.DEVNULL,
+            stdout=subprocess.DEVNULL,
+        )
+    except OSError:
+        return subst_list
+    if rc != 0:
         return subst_list
 
-    describe = minimal_env_cmd(["git", "describe", "--tags", "--always"])
+    describe = minimal_env_cmd(['git', 'describe', '--tags', '--always'])
     git_revision = describe.strip().decode('ascii')
 
-    branch = minimal_env_cmd(["git", "branch"])
+    branch = minimal_env_cmd(['git', 'branch'])
     git_branch = branch.strip().decode('ascii').replace('* ', '')
 
     subst_list.update({
-        "branch": git_branch,
-        "commit": git_revision
+        'branch': git_branch,
+        'commit': git_revision,
     })
 
     return subst_list
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     app_version()
diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py
index 4620129..e9b0b20 100644
--- a/youtube/local_playlist.py
+++ b/youtube/local_playlist.py
@@ -1,28 +1,42 @@
-from youtube import util, yt_data_extract
+from youtube import util
 from youtube import yt_app
 import settings
 
 import os
 import json
-import html
 import gevent
-import urllib
 import math
 import glob
+import re
 
 import flask
 from flask import request
 
-playlists_directory = os.path.join(settings.data_dir, "playlists")
-thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
+playlists_directory = os.path.join(settings.data_dir, 'playlists')
+thumbnails_directory = os.path.join(settings.data_dir, 'playlist_thumbnails')
+
+# Whitelist accepted playlist names so user input cannot escape
+# `playlists_directory` / `thumbnails_directory` (CWE-22, OWASP A01:2021).
+# Allow letters, digits, spaces, dot, dash and underscore.
+_PLAYLIST_NAME_RE = re.compile(r'^[\w .\-]{1,128}$')
+
+
+def _validate_playlist_name(name):
+    '''Return the stripped name if safe, otherwise abort with 400.'''
+    if name is None:
+        flask.abort(400)
+    name = name.strip()
+    if not _PLAYLIST_NAME_RE.match(name):
+        flask.abort(400)
+    return name
 
 
 def _find_playlist_path(name):
-    """Find playlist file robustly, handling trailing spaces in filenames"""
-    name = name.strip()
-    pattern = os.path.join(playlists_directory, name + "*.txt")
+    '''Find playlist file robustly, handling trailing spaces in filenames'''
+    name = _validate_playlist_name(name)
+    pattern = os.path.join(playlists_directory, name + '*.txt')
     files = glob.glob(pattern)
-    return files[0] if files else os.path.join(playlists_directory, name + ".txt")
+    return files[0] if files else os.path.join(playlists_directory, name + '.txt')
 
 
 def _parse_playlist_lines(data):
@@ -179,8 +193,9 @@ def path_edit_playlist(playlist_name):
         redirect_page_number = min(int(request.values.get('page', 1)), math.ceil(number_of_videos_remaining/50))
         return flask.redirect(util.URL_ORIGIN + request.path + '?page=' + str(redirect_page_number))
     elif request.values['action'] == 'remove_playlist':
+        safe_name = _validate_playlist_name(playlist_name)
         try:
-            os.remove(os.path.join(playlists_directory, playlist_name + ".txt"))
+            os.remove(os.path.join(playlists_directory, safe_name + '.txt'))
         except OSError:
             pass
         return flask.redirect(util.URL_ORIGIN + '/playlists')
@@ -220,8 +235,17 @@ def edit_playlist():
         flask.abort(400)
 
 
+_THUMBNAIL_RE = re.compile(r'^[A-Za-z0-9_-]{11}\.jpg$')
+
+
 @yt_app.route('/data/playlist_thumbnails/<playlist_name>/<thumbnail>')
 def serve_thumbnail(playlist_name, thumbnail):
-    # .. is necessary because flask always uses the application directory at ./youtube, not the working directory
+    # Validate both path components so a crafted URL cannot escape
+    # `thumbnails_directory` via `..` or NUL tricks (CWE-22).
+    safe_name = _validate_playlist_name(playlist_name)
+    if not _THUMBNAIL_RE.match(thumbnail):
+        flask.abort(400)
+    # .. is necessary because flask always uses the application directory at
+    # ./youtube, not the working directory.
     return flask.send_from_directory(
-        os.path.join('..', thumbnails_directory, playlist_name), thumbnail)
+        os.path.join('..', thumbnails_directory, safe_name), thumbnail)
diff --git a/youtube/playlist.py b/youtube/playlist.py
index e5a03cd..3b784ba 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -3,9 +3,7 @@ from youtube import yt_app
 import settings
 
 import base64
-import urllib
 import json
-import string
 import gevent
 import math
 from flask import request, abort
diff --git a/youtube/search.py b/youtube/search.py
index d586c62..6e62e28 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -5,7 +5,6 @@ import settings
 import json
 import urllib
 import base64
-import mimetypes
 from flask import request
 import flask
 import os
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 980822a..dafea58 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -292,7 +292,10 @@ def youtube_timestamp_to_posix(dumb_timestamp):
 def posix_to_dumbed_down(posix_time):
     '''Inverse of youtube_timestamp_to_posix.'''
     delta = int(time.time() - posix_time)
-    assert delta >= 0
+    # Guard against future timestamps (clock drift) without relying on
+    # `assert` (which is stripped under `python -O`).
+    if delta < 0:
+        delta = 0
 
     if delta == 0:
         return '0 seconds ago'
@@ -531,7 +534,8 @@ def _get_upstream_videos(channel_id):
             return None
 
         root = defusedxml.ElementTree.fromstring(feed)
-        assert remove_bullshit(root.tag) == 'feed'
+        if remove_bullshit(root.tag) != 'feed':
+            raise ValueError('Root element is not <feed>')
         for entry in root:
             if (remove_bullshit(entry.tag) != 'entry'):
                 continue
@@ -539,13 +543,13 @@ def _get_upstream_videos(channel_id):
             # it's yt:videoId in the xml but the yt: is turned into a namespace which is removed by remove_bullshit
             video_id_element = find_element(entry, 'videoId')
             time_published_element = find_element(entry, 'published')
-            assert video_id_element is not None
-            assert time_published_element is not None
+            if video_id_element is None or time_published_element is None:
+                raise ValueError('Missing videoId or published element')
 
             time_published = int(calendar.timegm(time.strptime(time_published_element.text, '%Y-%m-%dT%H:%M:%S+00:00')))
             times_published[video_id_element.text] = time_published
 
-    except AssertionError:
+    except ValueError:
         print('Failed to read atoma feed for ' + channel_status_name)
         traceback.print_exc()
     except defusedxml.ElementTree.ParseError:
@@ -593,7 +597,10 @@ def _get_upstream_videos(channel_id):
     # Special case: none of the videos have a time published.
     # In this case, make something up
     if videos and videos[0]['time_published'] is None:
-        assert all(v['time_published'] is None for v in videos)
+        # Invariant: if the first video has no timestamp, earlier passes
+        # ensure all of them are unset. Don't rely on `assert`.
+        if not all(v['time_published'] is None for v in videos):
+            raise RuntimeError('Inconsistent time_published state')
         now = time.time()
         for i in range(len(videos)):
             # 1 month between videos
@@ -808,7 +815,8 @@ def import_subscriptions():
         file = file.read().decode('utf-8')
         try:
             root = defusedxml.ElementTree.fromstring(file)
-            assert root.tag == 'opml'
+            if root.tag != 'opml':
+                raise ValueError('Root element is not <opml>')
             channels = []
             for outline_element in root[0][0]:
                 if (outline_element.tag != 'outline') or ('xmlUrl' not in outline_element.attrib):
@@ -819,7 +827,7 @@ def import_subscriptions():
                 channel_id = channel_rss_url[channel_rss_url.find('channel_id=')+11:].strip()
                 channels.append((channel_id, channel_name))
 
-        except (AssertionError, IndexError, defusedxml.ElementTree.ParseError) as e:
+        except (ValueError, IndexError, defusedxml.ElementTree.ParseError):
             return '400 Bad Request: Unable to read opml xml file, or the file is not the expected format', 400
     elif mime_type in ('text/csv', 'application/vnd.ms-excel'):
         content = file.read().decode('utf-8')
@@ -1071,11 +1079,20 @@ def post_subscriptions_page():
     return '', 204
 
 
+# YouTube video IDs are exactly 11 chars from [A-Za-z0-9_-]. Enforce this
+# before using the value in filesystem paths to prevent path traversal
+# (CWE-22, OWASP A01:2021).
+_VIDEO_ID_RE = re.compile(r'^[A-Za-z0-9_-]{11}$')
+
+
 @yt_app.route('/data/subscription_thumbnails/<thumbnail>')
 def serve_subscription_thumbnail(thumbnail):
     '''Serves thumbnail from disk if it's been saved already. If not, downloads the thumbnail, saves to disk, and serves it.'''
-    assert thumbnail[-4:] == '.jpg'
+    if not thumbnail.endswith('.jpg'):
+        flask.abort(400)
     video_id = thumbnail[0:-4]
+    if not _VIDEO_ID_RE.match(video_id):
+        flask.abort(400)
     thumbnail_path = os.path.join(thumbnails_directory, thumbnail)
 
     if video_id in existing_thumbnails:
diff --git a/youtube/util.py b/youtube/util.py
index 4e86217..5e60d1c 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 import logging
+import random
 import settings
 import socks
 import sockshandler
@@ -19,11 +20,11 @@ import gevent.queue
 import gevent.lock
 import collections
 import stem
-
-logger = logging.getLogger(__name__)
 import stem.control
 import traceback
 
+logger = logging.getLogger(__name__)
+
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
 #  configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -54,8 +55,8 @@ import traceback
 #   https://github.com/kennethreitz/requests/issues/2966
 
 # Until then, I will use a mix of urllib3 and urllib.
-import urllib3
-import urllib3.contrib.socks
+import urllib3  # noqa: E402  (imported here intentionally after the long note above)
+import urllib3.contrib.socks  # noqa: E402
 
 URL_ORIGIN = "/https://www.youtube.com"
 
@@ -177,7 +178,6 @@ def get_pool(use_tor):
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
     '''Separate cookiejars for receiving and sending'''
     def __init__(self, cookiejar_send=None, cookiejar_receive=None):
-        import http.cookiejar
         self.cookiejar_send = cookiejar_send
         self.cookiejar_receive = cookiejar_receive
 
@@ -208,6 +208,16 @@ class FetchError(Exception):
         self.error_message = error_message
 
 
+def _noop_cleanup(response):
+    '''No-op cleanup used when the urllib opener owns the response.'''
+    return None
+
+
+def _release_conn_cleanup(response):
+    '''Release the urllib3 pooled connection back to the pool.'''
+    response.release_conn()
+
+
 def decode_content(content, encoding_header):
     encodings = encoding_header.replace(' ', '').split(',')
     for encoding in reversed(encodings):
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
             opener = urllib.request.build_opener(cookie_processor)
 
         response = opener.open(req, timeout=timeout)
-        cleanup_func = (lambda r: None)
+        cleanup_func = _noop_cleanup
 
     else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
         # default: Retry.DEFAULT = Retry(3)
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
                      error_message=msg)
             else:
                 raise
-        cleanup_func = (lambda r: r.release_conn())
+        cleanup_func = _release_conn_cleanup
 
     return response, cleanup_func
 
@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
 
     Max retries: 5 attempts with exponential backoff
     """
-    import random
-
     max_retries = 5
     base_delay = 1.0  # Base delay in seconds
 
@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                     logger.error(f'Server error {response.status} after {max_retries} retries')
                     raise FetchError(str(response.status), reason=response.reason, ip=None)
 
-                # Exponential backoff for server errors
+                # Exponential backoff for server errors. Non-crypto jitter.
                 delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
                 logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
                 time.sleep(delay)
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                 else:
                     raise
 
-            # Wait and retry
+            # Wait and retry. Non-crypto jitter.
             delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
             logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
             time.sleep(delay)
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):
 
 
 def download_thumbnail(save_directory, video_id):
-    save_location = os.path.join(save_directory, video_id + ".jpg")
+    save_location = os.path.join(save_directory, video_id + '.jpg')
     for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
-        url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
+        url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
         try:
-            thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
+            thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
         except FetchError as e:
             if '404' in str(e):
                 continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
             return False
         except urllib.error.HTTPError as e:
             if e.code == 404:
                 continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
             return False
         try:
-            f = open(save_location, 'wb')
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
         except FileNotFoundError:
             os.makedirs(save_directory, exist_ok=True)
-            f = open(save_location, 'wb')
-        f.write(thumbnail)
-        f.close()
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
         return True
-    print("No thumbnail available for " + video_id)
+    print('No thumbnail available for ' + video_id)
     return False
 
 
diff --git a/youtube/watch.py b/youtube/watch.py
index 4cc6223..7f87215 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -1,27 +1,26 @@
+import json
+import logging
+import math
+import os
+import re
+import traceback
+import urllib
+from math import ceil
+from types import SimpleNamespace
+from urllib.parse import parse_qs, urlencode
+
+import flask
+import gevent
+import urllib3.exceptions
+from flask import request
+
 import youtube
 from youtube import yt_app
 from youtube import util, comments, local_playlist, yt_data_extract
-from youtube.util import time_utc_isoformat
 import settings
 
-from flask import request
-import flask
-import logging
-
 logger = logging.getLogger(__name__)
 
-import json
-import gevent
-import os
-import math
-import traceback
-import urllib
-import re
-import urllib3.exceptions
-from urllib.parse import parse_qs, urlencode
-from types import SimpleNamespace
-from math import ceil
-
 
 try:
     with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@@ -62,7 +61,9 @@ def get_video_sources(info, target_resolution):
             continue
         if not (fmt['init_range'] and fmt['index_range']):
             # Allow HLS-backed audio tracks (served locally, no init/index needed)
-            if not fmt.get('url', '').startswith('http://127.') and not '/ytl-api/' in fmt.get('url', ''):
+            url_value = fmt.get('url', '')
+            if (not url_value.startswith('http://127.')
+                    and '/ytl-api/' not in url_value):
                 continue
             # Mark as HLS for frontend
             fmt['is_hls'] = True
@@ -222,7 +223,7 @@ def lang_in(lang, sequence):
     if lang is None:
         return False
     lang = lang[0:2]
-    return lang in (l[0:2] for l in sequence)
+    return lang in (item[0:2] for item in sequence)
 
 
 def lang_eq(lang1, lang2):
@@ -238,9 +239,9 @@ def equiv_lang_in(lang, sequence):
     e.g. if lang is en, extracts en-GB from sequence.
     Necessary because if only a specific variant like en-GB is available, can't ask YouTube for simply en. Need to get the available variant.'''
     lang = lang[0:2]
-    for l in sequence:
-        if l[0:2] == lang:
-            return l
+    for item in sequence:
+        if item[0:2] == lang:
+            return item
     return None
 
 
@@ -310,7 +311,15 @@ def get_subtitle_sources(info):
             sources[-1]['on'] = True
 
     if len(sources) == 0:
-        assert len(info['automatic_caption_languages']) == 0 and len(info['manual_caption_languages']) == 0
+        # Invariant: with no caption sources there should be no languages
+        # either. Don't rely on `assert` which is stripped under `python -O`.
+        if (len(info['automatic_caption_languages']) != 0
+                or len(info['manual_caption_languages']) != 0):
+            logger.warning(
+                'Unexpected state: no subtitle sources but %d auto / %d manual languages',
+                len(info['automatic_caption_languages']),
+                len(info['manual_caption_languages']),
+            )
 
     return sources
 
@@ -669,7 +678,6 @@ def format_bytes(bytes):
 @yt_app.route('/ytl-api/audio-track-proxy')
 def audio_track_proxy():
     """Proxy for DASH audio tracks to avoid throttling."""
-    cache_key = request.args.get('id', '')
     audio_url = request.args.get('url', '')
 
     if not audio_url:
@@ -692,7 +700,7 @@ def audio_track_proxy():
 @yt_app.route('/ytl-api/audio-track')
 def get_audio_track():
     """Proxy HLS audio/video: playlist or individual segment."""
-    from youtube.hls_cache import get_hls_url, _tracks
+    from youtube.hls_cache import get_hls_url
 
     cache_key = request.args.get('id', '')
     seg_url = request.args.get('seg', '')
@@ -916,7 +924,7 @@ def get_hls_manifest():
         flask.abort(404, 'HLS manifest not found')
 
     try:
-        print(f'[hls-manifest] Fetching HLS manifest...')
+        print('[hls-manifest] Fetching HLS manifest...')
         manifest = util.fetch_url(hls_url,
             headers=(('User-Agent', 'Mozilla/5.0'),),
             debug_name='hls_manifest').decode('utf-8')
@@ -1018,7 +1026,8 @@ def get_storyboard_vtt():
     for i, board in enumerate(boards):
         *t, _, sigh = board.split("#")
         width, height, count, width_cnt, height_cnt, interval = map(int, t)
-        if height != wanted_height: continue
+        if height != wanted_height:
+            continue
         q['sigh'] = [sigh]
         url = f"{base_url}?{urlencode(q, doseq=True)}"
         storyboard = SimpleNamespace(
@@ -1182,7 +1191,6 @@ def get_watch_page(video_id=None):
     uni_sources = video_sources['uni_sources']
     pair_sources = video_sources['pair_sources']
     pair_idx = video_sources['pair_idx']
-    audio_track_sources = video_sources['audio_track_sources']
 
     # Build audio tracks list from HLS
     audio_tracks = []