feat: add Spanish README and improve channel/playlist handling
All checks were successful
CI / test (push) Successful in 52s
All checks were successful
CI / test (push) Successful in 52s
* Add complete Spanish translation (README.es.md) * Restructure English README for clarity and conciseness * Filter out YouTube Shorts from channel video listings (sort=4) * Add fallback for video count using playlist metadata when API returns zero * Add get_playlist_metadata() to fetch metadata without full video list * Add is_short() utility to detect YouTube Shorts by duration, badges, and type * Export is_short from yt_data_extract for use across modules
This commit is contained in:
@@ -475,6 +475,27 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
|
||||
pl_info = yt_data_extract.extract_playlist_info(pl_json)
|
||||
number_of_videos = tasks[2].value
|
||||
|
||||
# Filter out shorts locally if sort=4 (YouTube may not honor API filter)
|
||||
if sort == '4' and pl_info.get('items'):
|
||||
pl_info['items'] = [
|
||||
item for item in pl_info['items']
|
||||
if not yt_data_extract.is_short(item)
|
||||
]
|
||||
|
||||
# If channel API count is missing/zero, get from playlist metadata
|
||||
if not number_of_videos or number_of_videos == 0:
|
||||
try:
|
||||
metadata_json = playlist.get_playlist_metadata(
|
||||
'UU' + channel_id[2:],
|
||||
report_text='Retrieved playlist metadata'
|
||||
)
|
||||
metadata_info = yt_data_extract.extract_playlist_info(metadata_json)
|
||||
metadata_video_count = metadata_info['metadata'].get('video_count')
|
||||
if metadata_video_count:
|
||||
number_of_videos = metadata_video_count
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
info = pl_info
|
||||
info['channel_id'] = channel_id
|
||||
info['current_tab'] = 'videos'
|
||||
|
||||
@@ -28,6 +28,32 @@ def playlist_ctoken(playlist_id, offset, include_shorts=True):
|
||||
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
|
||||
|
||||
|
||||
def get_playlist_metadata(playlist_id, report_text="Retrieved playlist metadata"):
|
||||
"""Get playlist metadata (video_count, title, etc.) without fetching videos."""
|
||||
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
||||
url = f'https://www.youtube.com/youtubei/v1/browse?key={key}'
|
||||
|
||||
data = {
|
||||
'context': {
|
||||
'client': {
|
||||
'hl': 'en',
|
||||
'gl': 'US',
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20240327.00.00',
|
||||
},
|
||||
},
|
||||
'browseId': 'VL' + playlist_id,
|
||||
}
|
||||
|
||||
content_type_header = (('Content-Type', 'application/json'),)
|
||||
content = util.fetch_url(
|
||||
url, util.desktop_xhr_headers + content_type_header,
|
||||
data=json.dumps(data),
|
||||
report_text=report_text, debug_name='playlist_metadata'
|
||||
)
|
||||
return json.loads(content.decode('utf-8'))
|
||||
|
||||
|
||||
def playlist_first_page(playlist_id, report_text="Retrieved playlist",
|
||||
use_mobile=False):
|
||||
# Use innertube API (pbj=1 no longer works for many playlists)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import (get, multi_get, deep_get, multi_deep_get,
|
||||
liberal_update, conservative_update, remove_redirect, normalize_url,
|
||||
extract_str, extract_formatted_text, extract_int, extract_approx_int,
|
||||
extract_date, extract_item_info, extract_items, extract_response)
|
||||
extract_date, extract_item_info, extract_items, extract_response, is_short)
|
||||
|
||||
from .everything_else import (extract_channel_info, extract_search_info,
|
||||
extract_playlist_metadata, extract_playlist_info, extract_comments_info)
|
||||
|
||||
@@ -410,6 +410,51 @@ def extract_shorts_lockup_view_model_info(item, additional_info={}):
|
||||
return info
|
||||
|
||||
|
||||
def is_short(item_info):
|
||||
"""Check if a video item is a YouTube Short.
|
||||
|
||||
Shorts are identified by:
|
||||
1. Duration < 60 seconds (typical Shorts length)
|
||||
2. Having "Shorts" badge or type
|
||||
3. Being extracted from shortsLockupViewModel or reelItemRenderer
|
||||
"""
|
||||
if not item_info or item_info.get('error'):
|
||||
return False
|
||||
|
||||
# Check renderer type
|
||||
item_type = item_info.get('type', '')
|
||||
if item_type == 'unsupported':
|
||||
return False
|
||||
|
||||
# Check for "Shorts" badge
|
||||
badges = item_info.get('badges', [])
|
||||
if any('short' in str(badge).lower() for badge in badges):
|
||||
return True
|
||||
|
||||
# Check duration (Shorts are typically < 60 seconds)
|
||||
duration = item_info.get('duration')
|
||||
if duration is not None:
|
||||
# Duration can be string like "0:58" or "1:23:45" or int
|
||||
if isinstance(duration, str):
|
||||
# Parse duration string to seconds
|
||||
parts = duration.split(':')
|
||||
try:
|
||||
if len(parts) == 2: # MM:SS
|
||||
duration_seconds = int(parts[0]) * 60 + int(parts[1])
|
||||
elif len(parts) == 3: # HH:MM:SS
|
||||
duration_seconds = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
|
||||
else:
|
||||
duration_seconds = int(duration)
|
||||
if duration_seconds < 60:
|
||||
return True
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
elif isinstance(duration, (int, float)) and duration < 60:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_item_info(item, additional_info={}):
|
||||
if not item:
|
||||
return {'error': 'No item given'}
|
||||
|
||||
Reference in New Issue
Block a user