security: harden code against command injection and path traversal

Core changes:

* enforce HTTPS URLs and remove shell usage in generate_release.py
* replace os.system calls with subprocess across the codebase
* validate external inputs (playlist names, video IDs)

Improvements and fixes:

* settings.py: fix typo (node.lineno → line_number); use isinstance() over type()
* youtube/get_app_version: improve git detection using subprocess.DEVNULL
* youtube/util.py: add cleanup helpers; use shutil.which for binary resolution

YouTube modules:

* watch.py: detect and flag HLS streams; remove unused audio_track_sources
* comments.py: return early when comments are disabled; add error handling
* local_playlist.py: validate playlist names to prevent path traversal
* subscriptions.py: replace asserts with proper error handling; validate video IDs

Cleanup:

* remove unused imports across modules (playlist, search, channel)
* reorganize package imports in youtube/**init**.py
* simplify test imports and fix cleanup_func in tests

Tests:

* tests/test_shorts.py: simplify imports
* tests/test_util.py: fix cleanup_func definition
This commit is contained in:
2026-04-20 00:39:35 -05:00
parent 155bd4df49
commit d6190a2d0b
16 changed files with 237 additions and 146 deletions

View File

@@ -1,5 +1,6 @@
from datetime import datetime
import logging
import random
import settings
import socks
import sockshandler
@@ -19,11 +20,11 @@ import gevent.queue
import gevent.lock
import collections
import stem
logger = logging.getLogger(__name__)
import stem.control
import traceback
logger = logging.getLogger(__name__)
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
# configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -54,8 +55,8 @@ import traceback
# https://github.com/kennethreitz/requests/issues/2966
# Until then, I will use a mix of urllib3 and urllib.
import urllib3
import urllib3.contrib.socks
import urllib3 # noqa: E402 (imported here intentionally after the long note above)
import urllib3.contrib.socks # noqa: E402
URL_ORIGIN = "/https://www.youtube.com"
@@ -177,7 +178,6 @@ def get_pool(use_tor):
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
import http.cookiejar
self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive
@@ -208,6 +208,16 @@ class FetchError(Exception):
self.error_message = error_message
def _noop_cleanup(response):
'''No-op cleanup used when the urllib opener owns the response.'''
return None
def _release_conn_cleanup(response):
'''Release the urllib3 pooled connection back to the pool.'''
response.release_conn()
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings):
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
cleanup_func = (lambda r: None)
cleanup_func = _noop_cleanup
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
# default: Retry.DEFAULT = Retry(3)
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
error_message=msg)
else:
raise
cleanup_func = (lambda r: r.release_conn())
cleanup_func = _release_conn_cleanup
return response, cleanup_func
@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
Max retries: 5 attempts with exponential backoff
"""
import random
max_retries = 5
base_delay = 1.0 # Base delay in seconds
@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
logger.error(f'Server error {response.status} after {max_retries} retries')
raise FetchError(str(response.status), reason=response.reason, ip=None)
# Exponential backoff for server errors
# Exponential backoff for server errors. Non-crypto jitter.
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
else:
raise
# Wait and retry
# Wait and retry. Non-crypto jitter.
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):
def download_thumbnail(save_directory, video_id):
save_location = os.path.join(save_directory, video_id + ".jpg")
save_location = os.path.join(save_directory, video_id + '.jpg')
for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
try:
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
except FetchError as e:
if '404' in str(e):
continue
print("Failed to download thumbnail for " + video_id + ": " + str(e))
print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
return False
except urllib.error.HTTPError as e:
if e.code == 404:
continue
print("Failed to download thumbnail for " + video_id + ": " + str(e))
print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
return False
try:
f = open(save_location, 'wb')
with open(save_location, 'wb') as f:
f.write(thumbnail)
except FileNotFoundError:
os.makedirs(save_directory, exist_ok=True)
f = open(save_location, 'wb')
f.write(thumbnail)
f.close()
with open(save_location, 'wb') as f:
f.write(thumbnail)
return True
print("No thumbnail available for " + video_id)
print('No thumbnail available for ' + video_id)
return False