feat: add Spanish README and improve channel/playlist handling
All checks were successful
CI / test (push) Successful in 52s

* Add complete Spanish translation (README.es.md)
* Restructure English README for clarity and conciseness
* Filter out YouTube Shorts from channel video listings (sort=4)
* Add fallback for video count using playlist metadata when API returns zero
* Add get_playlist_metadata() to fetch metadata without full video list
* Add is_short() utility to detect YouTube Shorts by duration, badges, and type
* Export is_short from yt_data_extract for use across modules
This commit is contained in:
Jesus
2026-04-12 20:20:32 -05:00
parent 550457936a
commit c6c8030907
6 changed files with 378 additions and 130 deletions

View File

@@ -475,6 +475,27 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
pl_info = yt_data_extract.extract_playlist_info(pl_json)
number_of_videos = tasks[2].value
# Filter out shorts locally if sort=4 (YouTube may not honor API filter)
if sort == '4' and pl_info.get('items'):
pl_info['items'] = [
item for item in pl_info['items']
if not yt_data_extract.is_short(item)
]
# If channel API count is missing/zero, get from playlist metadata
if not number_of_videos or number_of_videos == 0:
try:
metadata_json = playlist.get_playlist_metadata(
'UU' + channel_id[2:],
report_text='Retrieved playlist metadata'
)
metadata_info = yt_data_extract.extract_playlist_info(metadata_json)
metadata_video_count = metadata_info['metadata'].get('video_count')
if metadata_video_count:
number_of_videos = metadata_video_count
except Exception:
pass
info = pl_info
info['channel_id'] = channel_id
info['current_tab'] = 'videos'

View File

@@ -28,6 +28,32 @@ def playlist_ctoken(playlist_id, offset, include_shorts=True):
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def get_playlist_metadata(playlist_id, report_text="Retrieved playlist metadata"):
"""Get playlist metadata (video_count, title, etc.) without fetching videos."""
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
url = f'https://www.youtube.com/youtubei/v1/browse?key={key}'
data = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'clientName': 'WEB',
'clientVersion': '2.20240327.00.00',
},
},
'browseId': 'VL' + playlist_id,
}
content_type_header = (('Content-Type', 'application/json'),)
content = util.fetch_url(
url, util.desktop_xhr_headers + content_type_header,
data=json.dumps(data),
report_text=report_text, debug_name='playlist_metadata'
)
return json.loads(content.decode('utf-8'))
def playlist_first_page(playlist_id, report_text="Retrieved playlist",
use_mobile=False):
# Use innertube API (pbj=1 no longer works for many playlists)

View File

@@ -1,7 +1,7 @@
from .common import (get, multi_get, deep_get, multi_deep_get,
liberal_update, conservative_update, remove_redirect, normalize_url,
extract_str, extract_formatted_text, extract_int, extract_approx_int,
extract_date, extract_item_info, extract_items, extract_response)
extract_date, extract_item_info, extract_items, extract_response, is_short)
from .everything_else import (extract_channel_info, extract_search_info,
extract_playlist_metadata, extract_playlist_info, extract_comments_info)

View File

@@ -410,6 +410,51 @@ def extract_shorts_lockup_view_model_info(item, additional_info={}):
return info
def is_short(item_info):
"""Check if a video item is a YouTube Short.
Shorts are identified by:
1. Duration < 60 seconds (typical Shorts length)
2. Having "Shorts" badge or type
3. Being extracted from shortsLockupViewModel or reelItemRenderer
"""
if not item_info or item_info.get('error'):
return False
# Check renderer type
item_type = item_info.get('type', '')
if item_type == 'unsupported':
return False
# Check for "Shorts" badge
badges = item_info.get('badges', [])
if any('short' in str(badge).lower() for badge in badges):
return True
# Check duration (Shorts are typically < 60 seconds)
duration = item_info.get('duration')
if duration is not None:
# Duration can be string like "0:58" or "1:23:45" or int
if isinstance(duration, str):
# Parse duration string to seconds
parts = duration.split(':')
try:
if len(parts) == 2: # MM:SS
duration_seconds = int(parts[0]) * 60 + int(parts[1])
elif len(parts) == 3: # HH:MM:SS
duration_seconds = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
else:
duration_seconds = int(duration)
if duration_seconds < 60:
return True
except (ValueError, IndexError):
pass
elif isinstance(duration, (int, float)) and duration < 60:
return True
return False
def extract_item_info(item, additional_info={}):
if not item:
return {'error': 'No item given'}