This commit is contained in:
2026-03-22 14:17:23 -05:00
parent ed4b05d9b6
commit 84e1acaab8
13 changed files with 1097 additions and 18 deletions

210
Makefile Normal file
View File

@@ -0,0 +1,210 @@
# yt-local Makefile
# Automated tasks for development, translations, and maintenance
.PHONY: help install dev clean test i18n-extract i18n-init i18n-update i18n-compile i18n-stats i18n-clean setup-dev lint format backup restore
# Variables
PYTHON := python3
PIP := pip3
LANG_CODE ?= es
VENV_DIR := venv
PROJECT_NAME := yt-local
## Help
help: ## Show this help message
@echo "$(PROJECT_NAME) - Available tasks:"
@echo ""
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " %-20s %s\n", $$1, $$2}'
@echo ""
@echo "Examples:"
@echo " make install # Install dependencies"
@echo " make dev # Run development server"
@echo " make i18n-extract # Extract strings for translation"
@echo " make i18n-init LANG_CODE=fr # Initialize French"
@echo " make lint # Check code style"
## Installation and Setup
install: ## Install project dependencies
@echo "[INFO] Installing dependencies..."
$(PIP) install -r requirements.txt
@echo "[SUCCESS] Dependencies installed"
setup-dev: ## Complete development setup
@echo "[INFO] Setting up development environment..."
$(PYTHON) -m venv $(VENV_DIR)
./$(VENV_DIR)/bin/pip install -r requirements.txt
@echo "[SUCCESS] Virtual environment created in $(VENV_DIR)"
@echo "[INFO] Activate with: source $(VENV_DIR)/bin/activate"
requirements: ## Update and install requirements
@echo "[INFO] Installing/updating requirements..."
$(PIP) install --upgrade pip
$(PIP) install -r requirements.txt
@echo "[SUCCESS] Requirements installed"
## Development
dev: ## Run development server
@echo "[INFO] Starting development server..."
@echo "[INFO] Server available at: http://localhost:9010"
$(PYTHON) server.py
run: dev ## Alias for dev
## Testing
test: ## Run tests
@echo "[INFO] Running tests..."
@if [ -d "tests" ]; then \
$(PYTHON) -m pytest -v; \
else \
echo "[WARN] No tests directory found"; \
fi
test-cov: ## Run tests with coverage
@echo "[INFO] Running tests with coverage..."
@if command -v pytest-cov >/dev/null 2>&1; then \
$(PYTHON) -m pytest -v --cov=$(PROJECT_NAME) --cov-report=html; \
else \
echo "[WARN] pytest-cov not installed. Run: pip install pytest-cov"; \
fi
## Internationalization (i18n)
i18n-extract: ## Extract strings for translation
@echo "[INFO] Extracting strings for translation..."
$(PYTHON) manage_translations.py extract
@echo "[SUCCESS] Strings extracted to translations/messages.pot"
i18n-init: ## Initialize new language (use LANG_CODE=xx)
@echo "[INFO] Initializing language: $(LANG_CODE)"
$(PYTHON) manage_translations.py init $(LANG_CODE)
@echo "[SUCCESS] Language $(LANG_CODE) initialized"
@echo "[INFO] Edit: translations/$(LANG_CODE)/LC_MESSAGES/messages.po"
i18n-update: ## Update existing translations
@echo "[INFO] Updating existing translations..."
$(PYTHON) manage_translations.py update
@echo "[SUCCESS] Translations updated"
i18n-compile: ## Compile translations to binary .mo files
@echo "[INFO] Compiling translations..."
$(PYTHON) manage_translations.py compile
@echo "[SUCCESS] Translations compiled"
i18n-stats: ## Show translation statistics
@echo "[INFO] Translation statistics:"
@echo ""
@for lang_dir in translations/*/; do \
if [ -d "$$lang_dir" ] && [ "$$lang_dir" != "translations/*/" ]; then \
lang=$$(basename "$$lang_dir"); \
po_file="$$lang_dir/LC_MESSAGES/messages.po"; \
if [ -f "$$po_file" ]; then \
total=$$(grep -c "^msgid " "$$po_file" 2>/dev/null || echo "0"); \
translated=$$(grep -c "^msgstr \"[^\"]\+\"" "$$po_file" 2>/dev/null || echo "0"); \
fuzzy=$$(grep -c "^#, fuzzy" "$$po_file" 2>/dev/null || echo "0"); \
if [ "$$total" -gt 0 ]; then \
percent=$$((translated * 100 / total)); \
echo " [STAT] $$lang: $$translated/$$total ($$percent%) - Fuzzy: $$fuzzy"; \
else \
echo " [STAT] $$lang: No translations yet"; \
fi; \
fi \
fi \
done
@echo ""
i18n-clean: ## Clean compiled translation files
@echo "[INFO] Cleaning compiled .mo files..."
find translations/ -name "*.mo" -delete
@echo "[SUCCESS] .mo files removed"
i18n-workflow: ## Complete workflow: extract → update → compile
@echo "[INFO] Running complete translation workflow..."
@make i18n-extract
@make i18n-update
@make i18n-compile
@make i18n-stats
@echo "[SUCCESS] Translation workflow completed"
## Code Quality
lint: ## Check code with flake8
@echo "[INFO] Checking code style..."
@if command -v flake8 >/dev/null 2>&1; then \
flake8 youtube/ --max-line-length=120 --ignore=E501,W503,E402 --exclude=youtube/ytdlp_service.py,youtube/ytdlp_integration.py,youtube/ytdlp_proxy.py; \
echo "[SUCCESS] Code style check passed"; \
else \
echo "[WARN] flake8 not installed (pip install flake8)"; \
fi
format: ## Format code with black (if available)
@echo "[INFO] Formatting code..."
@if command -v black >/dev/null 2>&1; then \
black youtube/ --line-length=120 --exclude='ytdlp_.*\.py'; \
echo "[SUCCESS] Code formatted"; \
else \
echo "[WARN] black not installed (pip install black)"; \
fi
check-deps: ## Check installed dependencies
@echo "[INFO] Checking dependencies..."
@$(PYTHON) -c "import flask_babel; print('[OK] Flask-Babel:', flask_babel.__version__)" 2>/dev/null || echo "[ERROR] Flask-Babel not installed"
@$(PYTHON) -c "import flask; print('[OK] Flask:', flask.__version__)" 2>/dev/null || echo "[ERROR] Flask not installed"
@$(PYTHON) -c "import yt_dlp; print('[OK] yt-dlp:', yt_dlp.__version__)" 2>/dev/null || echo "[ERROR] yt-dlp not installed"
## Maintenance
backup: ## Create translations backup
@echo "[INFO] Creating translations backup..."
@timestamp=$$(date +%Y%m%d_%H%M%S); \
tar -czf "translations_backup_$$timestamp.tar.gz" translations/ 2>/dev/null || echo "[WARN] No translations to backup"; \
if [ -f "translations_backup_$$timestamp.tar.gz" ]; then \
echo "[SUCCESS] Backup created: translations_backup_$$timestamp.tar.gz"; \
fi
restore: ## Restore translations from backup
@echo "[INFO] Restoring translations from backup..."
@if ls translations_backup_*.tar.gz 1>/dev/null 2>&1; then \
latest_backup=$$(ls -t translations_backup_*.tar.gz | head -1); \
tar -xzf "$$latest_backup"; \
echo "[SUCCESS] Restored from: $$latest_backup"; \
else \
echo "[ERROR] No backup files found"; \
fi
clean: ## Clean temporary files and caches
@echo "[INFO] Cleaning temporary files..."
find . -type f -name "*.pyc" -delete
find . -type d -name "__pycache__" -delete
find . -type f -name "*.mo" -delete
find . -type d -name ".pytest_cache" -delete
find . -type f -name ".coverage" -delete
find . -type d -name "htmlcov" -delete
@echo "[SUCCESS] Temporary files removed"
distclean: clean ## Clean everything including venv
@echo "[INFO] Cleaning everything..."
rm -rf $(VENV_DIR)
@echo "[SUCCESS] Complete cleanup done"
## Project Information
info: ## Show project information
@echo "[INFO] $(PROJECT_NAME) - Project information:"
@echo ""
@echo " [INFO] Directory: $$(pwd)"
@echo " [INFO] Python: $$($(PYTHON) --version)"
@echo " [INFO] Pip: $$($(PIP) --version | cut -d' ' -f1-2)"
@echo ""
@echo " [INFO] Configured languages:"
@for lang_dir in translations/*/; do \
if [ -d "$$lang_dir" ] && [ "$$lang_dir" != "translations/*/" ]; then \
lang=$$(basename "$$lang_dir"); \
echo " - $$lang"; \
fi \
done
@echo ""
@echo " [INFO] Main files:"
@echo " - babel.cfg (i18n configuration)"
@echo " - manage_translations.py (i18n CLI)"
@echo " - youtube/i18n_strings.py (centralized strings)"
@echo " - youtube/ytdlp_service.py (yt-dlp integration)"
@echo ""
# Default target
.DEFAULT_GOAL := help

7
babel.cfg Normal file
View File

@@ -0,0 +1,7 @@
[python: youtube/**.py]
keywords = lazy_gettext:1,2 _l:1,2
[python: server.py]
[python: settings.py]
[jinja2: youtube/templates/**.html]
extensions=jinja2.ext.i18n
encoding = utf-8

95
manage_translations.py Normal file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""
Translation management script for yt-local
Usage:
python manage_translations.py extract # Extract strings to messages.pot
python manage_translations.py init es # Initialize Spanish translation
python manage_translations.py update # Update all translations
python manage_translations.py compile # Compile translations to .mo files
"""
import sys
import os
import subprocess
def run_command(cmd):
"""Run a shell command and print output"""
print(f"Running: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
return result.returncode
def extract():
"""Extract translatable strings from source code"""
print("Extracting translatable strings...")
return run_command([
'pybabel', 'extract',
'-F', 'babel.cfg',
'-k', 'lazy_gettext',
'-k', '_l',
'-o', 'translations/messages.pot',
'.'
])
def init(language):
"""Initialize a new language translation"""
print(f"Initializing {language} translation...")
return run_command([
'pybabel', 'init',
'-i', 'translations/messages.pot',
'-d', 'translations',
'-l', language
])
def update():
"""Update existing translations with new strings"""
print("Updating translations...")
return run_command([
'pybabel', 'update',
'-i', 'translations/messages.pot',
'-d', 'translations'
])
def compile_translations():
"""Compile .po files to .mo files"""
print("Compiling translations...")
return run_command([
'pybabel', 'compile',
'-d', 'translations'
])
def main():
if len(sys.argv) < 2:
print(__doc__)
sys.exit(1)
command = sys.argv[1]
if command == 'extract':
sys.exit(extract())
elif command == 'init':
if len(sys.argv) < 3:
print("Error: Please specify a language code (e.g., es, fr, de)")
sys.exit(1)
sys.exit(init(sys.argv[2]))
elif command == 'update':
sys.exit(update())
elif command == 'compile':
sys.exit(compile_translations())
else:
print(f"Unknown command: {command}")
print(__doc__)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -1,4 +1,6 @@
Flask>=1.0.3
Flask-Babel>=4.0.0
Babel>=2.12.0
gevent>=1.2.2
Brotli>=1.0.7
PySocks>=1.6.8
@@ -6,3 +8,5 @@ urllib3>=1.24.1
defusedxml>=0.5.0
cachetools>=4.0.0
stem>=1.8.0
yt-dlp>=2026.01.01
requests>=2.25.0

View File

@@ -296,6 +296,17 @@ Archive: https://archive.ph/OZQbN''',
'category': 'interface',
}),
('language', {
'type': str,
'default': 'en',
'comment': 'Interface language',
'options': [
('en', 'English'),
('es', 'Español'),
],
'category': 'interface',
}),
('embed_page_mode', {
'type': bool,
'label': 'Enable embed page',
@@ -329,6 +340,15 @@ Archive: https://archive.ph/OZQbN''',
'hidden': True,
}),
('ytdlp_enabled', {
'type': bool,
'default': True,
'comment': '''Enable yt-dlp integration for multi-language audio and subtitles''',
'hidden': False,
'label': 'Enable yt-dlp integration',
'category': 'playback',
}),
('settings_version', {
'type': int,
'default': 6,

View File

@@ -7,12 +7,36 @@ import settings
import traceback
import re
from sys import exc_info
from flask_babel import Babel
yt_app = flask.Flask(__name__)
yt_app.config['TEMPLATES_AUTO_RELOAD'] = True
yt_app.url_map.strict_slashes = False
# yt_app.jinja_env.trim_blocks = True
# yt_app.jinja_env.lstrip_blocks = True
# Configure Babel for i18n
import os
yt_app.config['BABEL_DEFAULT_LOCALE'] = 'en'
# Use absolute path for translations directory to avoid issues with package structure changes
_app_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
yt_app.config['BABEL_TRANSLATION_DIRECTORIES'] = os.path.join(_app_root, 'translations')
def get_locale():
"""Determine the best locale based on user preference or browser settings"""
# Check if user has a language preference in settings
if hasattr(settings, 'language') and settings.language:
locale = settings.language
print(f'[i18n] Using user preference: {locale}')
return locale
# Otherwise, use browser's Accept-Language header
# Only match languages with available translations
locale = request.accept_languages.best_match(['en', 'es'])
print(f'[i18n] Using browser language: {locale}')
return locale or 'en'
babel = Babel(yt_app, locale_selector=get_locale)
yt_app.add_url_rule('/settings', 'settings_page', settings.settings_page, methods=['POST', 'GET'])

112
youtube/i18n_strings.py Normal file
View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Centralized i18n strings for yt-local
This file contains static strings that need to be translated but are used
dynamically in templates or generated content. By importing this module,
these strings get extracted by babel for translation.
"""
from flask_babel import lazy_gettext as _l
# Settings categories
CATEGORY_NETWORK = _l('Network')
CATEGORY_PLAYBACK = _l('Playback')
CATEGORY_INTERFACE = _l('Interface')
# Common setting labels
ROUTE_TOR = _l('Route Tor')
DEFAULT_SUBTITLES_MODE = _l('Default subtitles mode')
AV1_CODEC_RANKING = _l('AV1 Codec Ranking')
VP8_VP9_CODEC_RANKING = _l('VP8/VP9 Codec Ranking')
H264_CODEC_RANKING = _l('H.264 Codec Ranking')
USE_INTEGRATED_SOURCES = _l('Use integrated sources')
ROUTE_IMAGES = _l('Route images')
ENABLE_COMMENTS_JS = _l('Enable comments.js')
ENABLE_SPONSORBLOCK = _l('Enable SponsorBlock')
ENABLE_EMBED_PAGE = _l('Enable embed page')
# Setting names (auto-generated from setting keys)
RELATED_VIDEOS_MODE = _l('Related videos mode')
COMMENTS_MODE = _l('Comments mode')
ENABLE_COMMENT_AVATARS = _l('Enable comment avatars')
DEFAULT_COMMENT_SORTING = _l('Default comment sorting')
THEATER_MODE = _l('Theater mode')
AUTOPLAY_VIDEOS = _l('Autoplay videos')
DEFAULT_RESOLUTION = _l('Default resolution')
USE_VIDEO_PLAYER = _l('Use video player')
USE_VIDEO_DOWNLOAD = _l('Use video download')
PROXY_IMAGES = _l('Proxy images')
THEME = _l('Theme')
FONT = _l('Font')
LANGUAGE = _l('Language')
EMBED_PAGE_MODE = _l('Embed page mode')
# Common option values
OFF = _l('Off')
ON = _l('On')
DISABLED = _l('Disabled')
ENABLED = _l('Enabled')
ALWAYS_SHOWN = _l('Always shown')
SHOWN_BY_CLICKING_BUTTON = _l('Shown by clicking button')
NATIVE = _l('Native')
NATIVE_WITH_HOTKEYS = _l('Native with hotkeys')
PLYR = _l('Plyr')
# Theme options
LIGHT = _l('Light')
GRAY = _l('Gray')
DARK = _l('Dark')
# Font options
BROWSER_DEFAULT = _l('Browser default')
LIBERATION_SERIF = _l('Liberation Serif')
ARIAL = _l('Arial')
VERDANA = _l('Verdana')
TAHOMA = _l('Tahoma')
# Search and filter options
SORT_BY = _l('Sort by')
RELEVANCE = _l('Relevance')
UPLOAD_DATE = _l('Upload date')
VIEW_COUNT = _l('View count')
RATING = _l('Rating')
# Time filters
ANY = _l('Any')
LAST_HOUR = _l('Last hour')
TODAY = _l('Today')
THIS_WEEK = _l('This week')
THIS_MONTH = _l('This month')
THIS_YEAR = _l('This year')
# Content types
TYPE = _l('Type')
VIDEO = _l('Video')
CHANNEL = _l('Channel')
PLAYLIST = _l('Playlist')
MOVIE = _l('Movie')
SHOW = _l('Show')
# Duration filters
DURATION = _l('Duration')
SHORT_DURATION = _l('Short (< 4 minutes)')
LONG_DURATION = _l('Long (> 20 minutes)')
# Actions
SEARCH = _l('Search')
DOWNLOAD = _l('Download')
SUBSCRIBE = _l('Subscribe')
UNSUBSCRIBE = _l('Unsubscribe')
IMPORT = _l('Import')
EXPORT = _l('Export')
SAVE = _l('Save')
CHECK = _l('Check')
MUTE = _l('Mute')
UNMUTE = _l('Unmute')
# Common UI elements
OPTIONS = _l('Options')
SETTINGS = _l('Settings')
ERROR = _l('Error')
LOADING = _l('loading...')

View File

@@ -35,57 +35,57 @@
</nav>
<form class="form" id="site-search" action="/youtube.com/results">
<input type="search" name="search_query" class="search-box" value="{{ search_box_value }}"
{{ "autofocus" if (request.path in ("/", "/results") or error_message) else "" }} required placeholder="Type to search...">
<button type="submit" value="Search" class="search-button">Search</button>
{{ "autofocus" if (request.path in ("/", "/results") or error_message) else "" }} required placeholder="{{ _('Type to search...') }}">
<button type="submit" value="Search" class="search-button">{{ _('Search') }}</button>
<!-- options -->
<div class="dropdown">
<!-- hidden box -->
<input id="options-toggle-cbox" class="opt-box" type="checkbox">
<!-- end hidden box -->
<label class="dropdown-label" for="options-toggle-cbox">Options</label>
<label class="dropdown-label" for="options-toggle-cbox">{{ _('Options') }}</label>
<div class="dropdown-content">
<h3>Sort by</h3>
<h3>{{ _('Sort by') }}</h3>
<div class="option">
<input type="radio" id="sort_relevance" name="sort" value="0">
<label for="sort_relevance">Relevance</label>
<label for="sort_relevance">{{ _('Relevance') }}</label>
</div>
<div class="option">
<input type="radio" id="sort_upload_date" name="sort" value="2">
<label for="sort_upload_date">Upload date</label>
<label for="sort_upload_date">{{ _('Upload date') }}</label>
</div>
<div class="option">
<input type="radio" id="sort_view_count" name="sort" value="3">
<label for="sort_view_count">View count</label>
<label for="sort_view_count">{{ _('View count') }}</label>
</div>
<div class="option">
<input type="radio" id="sort_rating" name="sort" value="1">
<label for="sort_rating">Rating</label>
<label for="sort_rating">{{ _('Rating') }}</label>
</div>
<h3>Upload date</h3>
<h3>{{ _('Upload date') }}</h3>
<div class="option">
<input type="radio" id="time_any" name="time" value="0">
<label for="time_any">Any</label>
<label for="time_any">{{ _('Any') }}</label>
</div>
<div class="option">
<input type="radio" id="time_last_hour" name="time" value="1">
<label for="time_last_hour">Last hour</label>
<label for="time_last_hour">{{ _('Last hour') }}</label>
</div>
<div class="option">
<input type="radio" id="time_today" name="time" value="2">
<label for="time_today">Today</label>
<label for="time_today">{{ _('Today') }}</label>
</div>
<div class="option">
<input type="radio" id="time_this_week" name="time" value="3">
<label for="time_this_week">This week</label>
<label for="time_this_week">{{ _('This week') }}</label>
</div>
<div class="option">
<input type="radio" id="time_this_month" name="time" value="4">
<label for="time_this_month">This month</label>
<label for="time_this_month">{{ _('This month') }}</label>
</div>
<div class="option">
<input type="radio" id="time_this_year" name="time" value="5">
<label for="time_this_year">This year</label>
<label for="time_this_year">{{ _('This year') }}</label>
</div>
<h3>Type</h3>

View File

@@ -31,11 +31,19 @@
<input type="number" id="{{ 'setting_' + setting_name }}" name="{{ setting_name }}" value="{{ value }}" step="1">
{% endif %}
{% elif setting_info['type'].__name__ == 'float' %}
<input type="number" id="{{ 'setting_' + setting_name }}" name="{{ setting_name }}" value="{{ value }}" step="0.01">
{% elif setting_info['type'].__name__ == 'str' %}
<input type="text" id="{{ 'setting_' + setting_name }}" name="{{ setting_name }}" value="{{ value }}">
{% if 'options' is in(setting_info) %}
<select id="{{ 'setting_' + setting_name }}" name="{{ setting_name }}">
{% for option in setting_info['options'] %}
<option value="{{ option[0] }}" {{ 'selected' if option[0] == value else '' }}>{{ option[1] }}</option>
{% endfor %}
</select>
{% else %}
<input type="text" id="{{ 'setting_' + setting_name }}" name="{{ setting_name }}" value="{{ value }}">
{% endif %}
{% else %}
<span>Error: Unknown setting type: setting_info['type'].__name__</span>
<span>Error: Unknown setting type: {{ setting_info['type'].__name__ }}</span>
{% endif %}
</li>
{% endif %}

View File

@@ -6,6 +6,9 @@ import settings
from flask import request
import flask
import logging
logger = logging.getLogger(__name__)
import json
import gevent
@@ -685,6 +688,18 @@ def get_watch_page(video_id=None):
pair_sources = source_info['pair_sources']
uni_idx, pair_idx = source_info['uni_idx'], source_info['pair_idx']
# Extract audio tracks using yt-dlp for multi-language support
audio_tracks = []
try:
from youtube import ytdlp_integration
ytdlp_info = ytdlp_integration.extract_video_info_ytdlp(video_id)
audio_tracks = ytdlp_info.get('audio_tracks', [])
if audio_tracks:
logger.info(f'Found {len(audio_tracks)} audio tracks for video {video_id}')
except Exception as e:
logger.warning(f'Failed to extract audio tracks: {e}')
audio_tracks = []
pair_quality = yt_data_extract.deep_get(pair_sources, pair_idx, 'quality')
uni_quality = yt_data_extract.deep_get(uni_sources, uni_idx, 'quality')
@@ -808,7 +823,9 @@ def get_watch_page(video_id=None):
'playlist': info['playlist'],
'related': info['related_videos'],
'playability_error': info['playability_error'],
'audio_tracks': audio_tracks,
},
audio_tracks = audio_tracks,
font_family = youtube.font_choices[settings.font], # for embed page
**source_info,
using_pair_sources = using_pair_sources,
@@ -884,3 +901,18 @@ def get_transcript(caption_path):
return flask.Response(result.encode('utf-8'),
mimetype='text/plain;charset=UTF-8')
# ============================================================================
# yt-dlp Integration Routes
# ============================================================================
@yt_app.route('/ytl-api/video-with-audio/<video_id>')
def proxy_video_with_audio(video_id):
"""
Proxy para servir video con audio específico usando yt-dlp
"""
from youtube import ytdlp_proxy
audio_lang = request.args.get('lang', 'en')
max_quality = int(request.args.get('quality', 720))
return ytdlp_proxy.stream_video_with_audio(video_id, audio_lang, max_quality)

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""
yt-dlp integration wrapper for backward compatibility.
This module now uses the centralized ytdlp_service for all operations.
"""
import logging
from youtube.ytdlp_service import (
extract_video_info,
get_language_name,
clear_cache,
get_cache_info,
)
logger = logging.getLogger(__name__)
def extract_video_info_ytdlp(video_id):
"""
Extract video information using yt-dlp (with caching).
This is a wrapper around ytdlp_service.extract_video_info()
for backward compatibility.
Args:
video_id: YouTube video ID
Returns:
Dictionary with audio_tracks, formats, title, duration
"""
logger.debug(f'Extracting video info (legacy API): {video_id}')
info = extract_video_info(video_id)
# Convert to legacy format for backward compatibility
return {
'audio_tracks': info.get('audio_tracks', []),
'all_audio_formats': info.get('formats', []),
'formats': info.get('formats', []),
'title': info.get('title', ''),
'duration': info.get('duration', 0),
'error': info.get('error'),
}
def get_audio_formats_for_language(video_id, language='en'):
"""
Get available audio formats for a specific language.
Args:
video_id: YouTube video ID
language: Language code (default: 'en')
Returns:
List of audio format dicts
"""
info = extract_video_info_ytdlp(video_id)
if 'error' in info:
logger.warning(f'Cannot get audio formats: {info["error"]}')
return []
audio_formats = []
for track in info.get('audio_tracks', []):
if track['language'] == language:
audio_formats.append(track)
logger.debug(f'Found {len(audio_formats)} {language} audio formats')
return audio_formats
__all__ = [
'extract_video_info_ytdlp',
'get_audio_formats_for_language',
'get_language_name',
'clear_cache',
'get_cache_info',
]

99
youtube/ytdlp_proxy.py Normal file
View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""
Proxy for serving videos with specific audio using yt-dlp.
This module provides streaming functionality for unified formats
with specific audio languages.
"""
import logging
from flask import Response, request, stream_with_context
import urllib.request
import urllib.error
from youtube.ytdlp_service import find_best_unified_format
logger = logging.getLogger(__name__)
def stream_video_with_audio(video_id: str, audio_language: str = 'en', max_quality: int = 720):
"""
Stream video with specific audio language.
Args:
video_id: YouTube video ID
audio_language: Preferred audio language (default: 'en')
max_quality: Maximum video height (default: 720)
Returns:
Flask Response with video stream, or 404 if not available
"""
logger.info(f'Stream request: {video_id} | audio={audio_language} | quality={max_quality}p')
# Find best unified format
best_format = find_best_unified_format(video_id, audio_language, max_quality)
if not best_format:
logger.info(f'No suitable unified format found, returning 404 to trigger fallback')
return Response('No suitable unified format available', status=404)
url = best_format.get('url')
if not url:
logger.error('Format found but no URL available')
return Response('Format URL not available', status=500)
logger.debug(f'Streaming from: {url[:80]}...')
# Stream the video
try:
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
req.add_header('Accept', '*/*')
# Add Range header if client requests it
if 'Range' in request.headers:
req.add_header('Range', request.headers['Range'])
logger.debug(f'Range request: {request.headers["Range"]}')
resp = urllib.request.urlopen(req, timeout=60)
def generate():
"""Generator for streaming video chunks."""
try:
while True:
chunk = resp.read(65536) # 64KB chunks
if not chunk:
break
yield chunk
except Exception as e:
logger.error(f'Stream error: {e}')
raise
# Build response headers
response_headers = {
'Content-Type': resp.headers.get('Content-Type', 'video/mp4'),
'Access-Control-Allow-Origin': '*',
}
# Copy important headers
for header in ['Content-Length', 'Content-Range', 'Accept-Ranges']:
if header in resp.headers:
response_headers[header] = resp.headers[header]
status_code = resp.getcode()
logger.info(f'Streaming started: {status_code}')
return Response(
stream_with_context(generate()),
status=status_code,
headers=response_headers,
direct_passthrough=True
)
except urllib.error.HTTPError as e:
logger.error(f'HTTP error streaming: {e.code} {e.reason}')
return Response(f'Error: {e.code} {e.reason}', status=e.code)
except urllib.error.URLError as e:
logger.error(f'URL error streaming: {e.reason}')
return Response(f'Network error: {e.reason}', status=502)
except Exception as e:
logger.error(f'Streaming error: {e}', exc_info=True)
return Response(f'Error: {e}', status=500)

390
youtube/ytdlp_service.py Normal file
View File

@@ -0,0 +1,390 @@
#!/usr/bin/env python3
"""
Centralized yt-dlp integration with caching, logging, and error handling.
This module provides a clean interface for yt-dlp functionality:
- Multi-language audio track extraction
- Subtitle extraction
- Age-restricted video support
All yt-dlp usage should go through this module for consistency.
"""
import logging
from functools import lru_cache
from typing import Dict, List, Optional, Any
import yt_dlp
import settings
logger = logging.getLogger(__name__)
# Language name mapping
LANGUAGE_NAMES = {
'en': 'English',
'es': 'Español',
'fr': 'Français',
'de': 'Deutsch',
'it': 'Italiano',
'pt': 'Português',
'ru': 'Русский',
'ja': '日本語',
'ko': '한국어',
'zh': '中文',
'ar': 'العربية',
'hi': 'हिन्दी',
'und': 'Unknown',
'zxx': 'No linguistic content',
}
def get_language_name(lang_code: str) -> str:
"""Convert ISO 639-1/2 language code to readable name."""
if not lang_code:
return 'Unknown'
return LANGUAGE_NAMES.get(lang_code.lower(), lang_code.upper())
def _get_ytdlp_config() -> Dict[str, Any]:
"""Get yt-dlp configuration from settings."""
config = {
'quiet': True,
'no_warnings': True,
'extract_flat': False,
'format': 'best',
'skip_download': True,
'socket_timeout': 30,
'extractor_retries': 3,
'http_chunk_size': 10485760, # 10MB
}
# Configure Tor proxy if enabled
if settings.route_tor:
config['proxy'] = 'socks5://127.0.0.1:9150'
logger.debug('Tor proxy enabled for yt-dlp')
# Use cookies if available
import os
cookies_file = 'youtube_cookies.txt'
if os.path.exists(cookies_file):
config['cookiefile'] = cookies_file
logger.debug('Using cookies file for yt-dlp')
return config
@lru_cache(maxsize=128)
def extract_video_info(video_id: str) -> Dict[str, Any]:
"""
Extract video information using yt-dlp with caching.
Args:
video_id: YouTube video ID
Returns:
Dictionary with video information including audio tracks
Caching:
Results are cached to avoid repeated requests to YouTube.
Cache size is limited to prevent memory issues.
"""
# Check if yt-dlp is enabled
if not getattr(settings, 'ytdlp_enabled', True):
logger.debug('yt-dlp integration is disabled')
return {'error': 'yt-dlp disabled', 'audio_tracks': []}
url = f'https://www.youtube.com/watch?v={video_id}'
ydl_opts = _get_ytdlp_config()
try:
logger.debug(f'Extracting video info: {video_id}')
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
if not info:
logger.warning(f'No info returned for video: {video_id}')
return {'error': 'No info returned', 'audio_tracks': []}
logger.debug(f'Extracted {len(info.get("formats", []))} formats')
# Extract audio tracks grouped by language
audio_tracks = _extract_audio_tracks(info)
return {
'video_id': video_id,
'title': info.get('title', ''),
'duration': info.get('duration', 0),
'audio_tracks': audio_tracks,
'formats': info.get('formats', []),
'subtitles': info.get('subtitles', {}),
'automatic_captions': info.get('automatic_captions', {}),
}
except yt_dlp.utils.DownloadError as e:
logger.error(f'yt-dlp download error for {video_id}: {e}')
return {'error': str(e), 'audio_tracks': []}
except Exception as e:
logger.error(f'yt-dlp extraction error for {video_id}: {e}', exc_info=True)
return {'error': str(e), 'audio_tracks': []}
def _extract_audio_tracks(info: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extract audio tracks from video info, grouped by language.
Returns a list of unique audio tracks (one per language),
keeping the highest quality for each language.
"""
audio_by_language = {}
all_formats = info.get('formats', [])
for fmt in all_formats:
# Only audio-only formats
has_audio = fmt.get('acodec') and fmt.get('acodec') != 'none'
has_video = fmt.get('vcodec') and fmt.get('vcodec') != 'none'
if not has_audio or has_video:
continue
# Extract language information
lang = (
fmt.get('language') or
fmt.get('audio_language') or
fmt.get('lang') or
'und'
)
# Get language name
lang_name = (
fmt.get('language_name') or
fmt.get('lang_name') or
get_language_name(lang)
)
# Get bitrate
bitrate = fmt.get('abr') or fmt.get('tbr') or 0
# Create track info
track_info = {
'language': lang,
'language_name': lang_name,
'format_id': str(fmt.get('format_id', '')),
'itag': str(fmt.get('format_id', '')),
'ext': fmt.get('ext'),
'acodec': fmt.get('acodec'),
'audio_bitrate': int(bitrate) if bitrate else 0,
'audio_sample_rate': fmt.get('asr'),
'url': fmt.get('url'),
'filesize': fmt.get('filesize'),
}
# Keep best quality per language
lang_key = lang.lower()
if lang_key not in audio_by_language:
audio_by_language[lang_key] = track_info
else:
current_bitrate = audio_by_language[lang_key].get('audio_bitrate', 0)
if bitrate > current_bitrate:
audio_by_language[lang_key] = track_info
logger.debug(f'Updated {lang} to higher bitrate: {bitrate}')
# Convert to list and sort
audio_tracks = list(audio_by_language.values())
# Sort: English first, then by bitrate (descending)
audio_tracks.sort(
key=lambda x: (
0 if x['language'] == 'en' else 1,
-x.get('audio_bitrate', 0)
)
)
logger.debug(f'Found {len(audio_tracks)} unique audio tracks')
for track in audio_tracks[:3]: # Log first 3
logger.debug(f' - {track["language_name"]}: {track["audio_bitrate"]}k')
return audio_tracks
def get_subtitle_url(video_id: str, lang: str = 'en') -> Optional[str]:
"""
Get subtitle URL for a specific language.
Args:
video_id: YouTube video ID
lang: Language code (default: 'en')
Returns:
URL to subtitle file, or None if not available
"""
info = extract_video_info(video_id)
if 'error' in info:
logger.warning(f'Cannot get subtitles: {info["error"]}')
return None
# Try manual subtitles first
subtitles = info.get('subtitles', {})
if lang in subtitles:
for sub in subtitles[lang]:
if sub.get('ext') == 'vtt':
logger.debug(f'Found manual {lang} subtitle')
return sub.get('url')
# Try automatic captions
auto_captions = info.get('automatic_captions', {})
if lang in auto_captions:
for sub in auto_captions[lang]:
if sub.get('ext') == 'vtt':
logger.debug(f'Found automatic {lang} subtitle')
return sub.get('url')
logger.debug(f'No {lang} subtitle found')
return None
def find_best_unified_format(
video_id: str,
audio_language: str = 'en',
max_quality: int = 720
) -> Optional[Dict[str, Any]]:
"""
Find best unified (video+audio) format for specific language and quality.
Args:
video_id: YouTube video ID
audio_language: Preferred audio language
max_quality: Maximum video height (e.g., 720, 1080)
Returns:
Format dict if found, None otherwise
"""
info = extract_video_info(video_id)
if 'error' in info or not info.get('formats'):
return None
# Quality thresholds (minimum acceptable height as % of requested)
thresholds = {
2160: 0.85,
1440: 0.80,
1080: 0.70,
720: 0.70,
480: 0.60,
360: 0.50,
}
# Get threshold for requested quality
threshold = 0.70
for q, t in thresholds.items():
if max_quality >= q:
threshold = t
break
min_height = int(max_quality * threshold)
logger.debug(f'Quality threshold: {threshold:.0%} = min {min_height}p for {max_quality}p')
candidates = []
audio_lang_lower = audio_language.lower()
for fmt in info['formats']:
# Must have both video and audio
has_video = fmt.get('vcodec') and fmt.get('vcodec') != 'none'
has_audio = fmt.get('acodec') and fmt.get('acodec') != 'none'
if not (has_video and has_audio):
continue
# Skip HLS/DASH formats
protocol = fmt.get('protocol', '')
format_id = str(fmt.get('format_id', ''))
if any(x in protocol.lower() for x in ['m3u8', 'hls', 'dash']):
continue
if format_id.startswith('9'): # HLS formats
continue
height = fmt.get('height', 0)
if height < min_height:
continue
# Language matching
lang = (
fmt.get('language') or
fmt.get('audio_language') or
'en'
).lower()
lang_match = (
lang == audio_lang_lower or
lang.startswith(audio_lang_lower[:2]) or
audio_lang_lower.startswith(lang[:2])
)
if not lang_match:
continue
# Calculate score
score = 0
# Language match bonus
if lang == audio_lang_lower:
score += 10000
elif lang.startswith(audio_lang_lower[:2]):
score += 8000
else:
score += 5000
# Quality score
quality_diff = abs(height - max_quality)
if height >= max_quality:
score += 3000 - quality_diff
else:
score += 2000 - quality_diff
# Protocol preference
if protocol in ('https', 'http'):
score += 500
# Format preference
if fmt.get('ext') == 'mp4':
score += 100
candidates.append({
'format': fmt,
'score': score,
'height': height,
'lang': lang,
})
if not candidates:
logger.debug(f'No unified format found for {max_quality}p + {audio_language}')
return None
# Sort by score and return best
candidates.sort(key=lambda x: x['score'], reverse=True)
best = candidates[0]
logger.info(
f'Selected unified format: {best["format"].get("format_id")} | '
f'{best["lang"]} | {best["height"]}p | score={best["score"]}'
)
return best['format']
def clear_cache():
"""Clear the video info cache."""
extract_video_info.cache_clear()
logger.info('yt-dlp cache cleared')
def get_cache_info() -> Dict[str, Any]:
"""Get cache statistics."""
cache_info = extract_video_info.cache_info()
return {
'hits': cache_info.hits,
'misses': cache_info.misses,
'size': cache_info.currsize,
'maxsize': cache_info.maxsize,
'hit_rate': cache_info.hits / (cache_info.hits + cache_info.misses) if (cache_info.hits + cache_info.misses) > 0 else 0,
}