security: harden code against command injection and path traversal

Core changes: * enforce HTTPS URLs and remove shell usage in generate_release.py * replace os.system calls with subprocess across the codebase * validate external inputs (playlist names, video IDs) Improvements and fixes: * settings.py: fix typo (node.lineno → line_number); use isinstance() over type() * youtube/get_app_version: improve git detection using subprocess.DEVNULL * youtube/util.py: add cleanup helpers; use shutil.which for binary resolution YouTube modules: * watch.py: detect and flag HLS streams; remove unused audio_track_sources * comments.py: return early when comments are disabled; add error handling * local_playlist.py: validate playlist names to prevent path traversal * subscriptions.py: replace asserts with proper error handling; validate video IDs Cleanup: * remove unused imports across modules (playlist, search, channel) * reorganize package imports in youtube/**init**.py * simplify test imports and fix cleanup_func in tests Tests: * tests/test_shorts.py: simplify imports * tests/test_util.py: fix cleanup_func definition
2026-04-20 00:39:35 -05:00
parent 155bd4df49
commit d6190a2d0b
16 changed files with 237 additions and 146 deletions
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 import logging
+import random
 import settings
 import socks
 import sockshandler
@@ -19,11 +20,11 @@ import gevent.queue
 import gevent.lock
 import collections
 import stem
-
-logger = logging.getLogger(__name__)
 import stem.control
 import traceback

+logger = logging.getLogger(__name__)
+
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
 #  configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -54,8 +55,8 @@ import traceback
 #   https://github.com/kennethreitz/requests/issues/2966

 # Until then, I will use a mix of urllib3 and urllib.
-import urllib3
-import urllib3.contrib.socks
+import urllib3  # noqa: E402  (imported here intentionally after the long note above)
+import urllib3.contrib.socks  # noqa: E402

 URL_ORIGIN = "/https://www.youtube.com"

@@ -177,7 +178,6 @@ def get_pool(use_tor):
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
    '''Separate cookiejars for receiving and sending'''
    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
-        import http.cookiejar
        self.cookiejar_send = cookiejar_send
        self.cookiejar_receive = cookiejar_receive

@@ -208,6 +208,16 @@ class FetchError(Exception):
        self.error_message = error_message


+def _noop_cleanup(response):
+    '''No-op cleanup used when the urllib opener owns the response.'''
+    return None
+
+
+def _release_conn_cleanup(response):
+    '''Release the urllib3 pooled connection back to the pool.'''
+    response.release_conn()
+
+
 def decode_content(content, encoding_header):
    encodings = encoding_header.replace(' ', '').split(',')
    for encoding in reversed(encodings):
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
            opener = urllib.request.build_opener(cookie_processor)

        response = opener.open(req, timeout=timeout)
-        cleanup_func = (lambda r: None)
+        cleanup_func = _noop_cleanup

    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        # default: Retry.DEFAULT = Retry(3)
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
                     error_message=msg)
            else:
                raise
-        cleanup_func = (lambda r: r.release_conn())
+        cleanup_func = _release_conn_cleanup

    return response, cleanup_func

@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,

    Max retries: 5 attempts with exponential backoff
    """
-    import random
-
    max_retries = 5
    base_delay = 1.0  # Base delay in seconds

@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                    logger.error(f'Server error {response.status} after {max_retries} retries')
                    raise FetchError(str(response.status), reason=response.reason, ip=None)

-                # Exponential backoff for server errors
+                # Exponential backoff for server errors. Non-crypto jitter.
                delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
                logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
                time.sleep(delay)
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
                else:
                    raise

-            # Wait and retry
+            # Wait and retry. Non-crypto jitter.
            delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
            logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
            time.sleep(delay)
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):


 def download_thumbnail(save_directory, video_id):
-    save_location = os.path.join(save_directory, video_id + ".jpg")
+    save_location = os.path.join(save_directory, video_id + '.jpg')
    for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
-        url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
+        url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
        try:
-            thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
+            thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
        except FetchError as e:
            if '404' in str(e):
                continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
            return False
        except urllib.error.HTTPError as e:
            if e.code == 404:
                continue
-            print("Failed to download thumbnail for " + video_id + ": " + str(e))
+            print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
            return False
        try:
-            f = open(save_location, 'wb')
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
        except FileNotFoundError:
            os.makedirs(save_directory, exist_ok=True)
-            f = open(save_location, 'wb')
-        f.write(thumbnail)
-        f.close()
+            with open(save_location, 'wb') as f:
+                f.write(thumbnail)
        return True
-    print("No thumbnail available for " + video_id)
+    print('No thumbnail available for ' + video_id)
    return False