security: harden code against command injection and path traversal
Core changes: * enforce HTTPS URLs and remove shell usage in generate_release.py * replace os.system calls with subprocess across the codebase * validate external inputs (playlist names, video IDs) Improvements and fixes: * settings.py: fix typo (node.lineno → line_number); use isinstance() over type() * youtube/get_app_version: improve git detection using subprocess.DEVNULL * youtube/util.py: add cleanup helpers; use shutil.which for binary resolution YouTube modules: * watch.py: detect and flag HLS streams; remove unused audio_track_sources * comments.py: return early when comments are disabled; add error handling * local_playlist.py: validate playlist names to prevent path traversal * subscriptions.py: replace asserts with proper error handling; validate video IDs Cleanup: * remove unused imports across modules (playlist, search, channel) * reorganize package imports in youtube/**init**.py * simplify test imports and fix cleanup_func in tests Tests: * tests/test_shorts.py: simplify imports * tests/test_util.py: fix cleanup_func definition
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import random
|
||||
import settings
|
||||
import socks
|
||||
import sockshandler
|
||||
@@ -19,11 +20,11 @@ import gevent.queue
|
||||
import gevent.lock
|
||||
import collections
|
||||
import stem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import stem.control
|
||||
import traceback
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# The trouble with the requests library: It ships its own certificate bundle via certifi
|
||||
# instead of using the system certificate store, meaning self-signed certificates
|
||||
# configured by the user will not work. Some draconian networks block TLS unless a corporate
|
||||
@@ -54,8 +55,8 @@ import traceback
|
||||
# https://github.com/kennethreitz/requests/issues/2966
|
||||
|
||||
# Until then, I will use a mix of urllib3 and urllib.
|
||||
import urllib3
|
||||
import urllib3.contrib.socks
|
||||
import urllib3 # noqa: E402 (imported here intentionally after the long note above)
|
||||
import urllib3.contrib.socks # noqa: E402
|
||||
|
||||
URL_ORIGIN = "/https://www.youtube.com"
|
||||
|
||||
@@ -177,7 +178,6 @@ def get_pool(use_tor):
|
||||
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
|
||||
'''Separate cookiejars for receiving and sending'''
|
||||
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
|
||||
import http.cookiejar
|
||||
self.cookiejar_send = cookiejar_send
|
||||
self.cookiejar_receive = cookiejar_receive
|
||||
|
||||
@@ -208,6 +208,16 @@ class FetchError(Exception):
|
||||
self.error_message = error_message
|
||||
|
||||
|
||||
def _noop_cleanup(response):
|
||||
'''No-op cleanup used when the urllib opener owns the response.'''
|
||||
return None
|
||||
|
||||
|
||||
def _release_conn_cleanup(response):
|
||||
'''Release the urllib3 pooled connection back to the pool.'''
|
||||
response.release_conn()
|
||||
|
||||
|
||||
def decode_content(content, encoding_header):
|
||||
encodings = encoding_header.replace(' ', '').split(',')
|
||||
for encoding in reversed(encodings):
|
||||
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
opener = urllib.request.build_opener(cookie_processor)
|
||||
|
||||
response = opener.open(req, timeout=timeout)
|
||||
cleanup_func = (lambda r: None)
|
||||
cleanup_func = _noop_cleanup
|
||||
|
||||
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
|
||||
# default: Retry.DEFAULT = Retry(3)
|
||||
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
|
||||
error_message=msg)
|
||||
else:
|
||||
raise
|
||||
cleanup_func = (lambda r: r.release_conn())
|
||||
cleanup_func = _release_conn_cleanup
|
||||
|
||||
return response, cleanup_func
|
||||
|
||||
@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
|
||||
Max retries: 5 attempts with exponential backoff
|
||||
"""
|
||||
import random
|
||||
|
||||
max_retries = 5
|
||||
base_delay = 1.0 # Base delay in seconds
|
||||
|
||||
@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
logger.error(f'Server error {response.status} after {max_retries} retries')
|
||||
raise FetchError(str(response.status), reason=response.reason, ip=None)
|
||||
|
||||
# Exponential backoff for server errors
|
||||
# Exponential backoff for server errors. Non-crypto jitter.
|
||||
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
|
||||
time.sleep(delay)
|
||||
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
|
||||
else:
|
||||
raise
|
||||
|
||||
# Wait and retry
|
||||
# Wait and retry. Non-crypto jitter.
|
||||
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
|
||||
time.sleep(delay)
|
||||
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):
|
||||
|
||||
|
||||
def download_thumbnail(save_directory, video_id):
|
||||
save_location = os.path.join(save_directory, video_id + ".jpg")
|
||||
save_location = os.path.join(save_directory, video_id + '.jpg')
|
||||
for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
|
||||
url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
|
||||
url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
|
||||
try:
|
||||
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
|
||||
thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
|
||||
except FetchError as e:
|
||||
if '404' in str(e):
|
||||
continue
|
||||
print("Failed to download thumbnail for " + video_id + ": " + str(e))
|
||||
print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
|
||||
return False
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
continue
|
||||
print("Failed to download thumbnail for " + video_id + ": " + str(e))
|
||||
print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
|
||||
return False
|
||||
try:
|
||||
f = open(save_location, 'wb')
|
||||
with open(save_location, 'wb') as f:
|
||||
f.write(thumbnail)
|
||||
except FileNotFoundError:
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
f = open(save_location, 'wb')
|
||||
f.write(thumbnail)
|
||||
f.close()
|
||||
with open(save_location, 'wb') as f:
|
||||
f.write(thumbnail)
|
||||
return True
|
||||
print("No thumbnail available for " + video_id)
|
||||
print('No thumbnail available for ' + video_id)
|
||||
return False
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user