feat: add Spanish README and improve channel/playlist handling

* Add complete Spanish translation (README.es.md) * Restructure English README for clarity and conciseness * Filter out YouTube Shorts from channel video listings (sort=4) * Add fallback for video count using playlist metadata when API returns zero * Add get_playlist_metadata() to fetch metadata without full video list * Add is_short() utility to detect YouTube Shorts by duration, badges, and type * Export is_short from yt_data_extract for use across modules
2026-04-12 20:20:32 -05:00
parent 550457936a
commit c6c8030907
6 changed files with 378 additions and 130 deletions
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -475,6 +475,27 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
            pl_info = yt_data_extract.extract_playlist_info(pl_json)
            number_of_videos = tasks[2].value

+            # Filter out shorts locally if sort=4 (YouTube may not honor API filter)
+            if sort == '4' and pl_info.get('items'):
+                pl_info['items'] = [
+                    item for item in pl_info['items']
+                    if not yt_data_extract.is_short(item)
+                ]
+
+            # If channel API count is missing/zero, get from playlist metadata
+            if not number_of_videos or number_of_videos == 0:
+                try:
+                    metadata_json = playlist.get_playlist_metadata(
+                        'UU' + channel_id[2:],
+                        report_text='Retrieved playlist metadata'
+                    )
+                    metadata_info = yt_data_extract.extract_playlist_info(metadata_json)
+                    metadata_video_count = metadata_info['metadata'].get('video_count')
+                    if metadata_video_count:
+                        number_of_videos = metadata_video_count
+                except Exception:
+                    pass
+
        info = pl_info
        info['channel_id'] = channel_id
        info['current_tab'] = 'videos'
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -28,6 +28,32 @@ def playlist_ctoken(playlist_id, offset, include_shorts=True):
    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')


+def get_playlist_metadata(playlist_id, report_text="Retrieved playlist metadata"):
+    """Get playlist metadata (video_count, title, etc.) without fetching videos."""
+    key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    url = f'https://www.youtube.com/youtubei/v1/browse?key={key}'
+
+    data = {
+        'context': {
+            'client': {
+                'hl': 'en',
+                'gl': 'US',
+                'clientName': 'WEB',
+                'clientVersion': '2.20240327.00.00',
+            },
+        },
+        'browseId': 'VL' + playlist_id,
+    }
+
+    content_type_header = (('Content-Type', 'application/json'),)
+    content = util.fetch_url(
+        url, util.desktop_xhr_headers + content_type_header,
+        data=json.dumps(data),
+        report_text=report_text, debug_name='playlist_metadata'
+    )
+    return json.loads(content.decode('utf-8'))
+
+
 def playlist_first_page(playlist_id, report_text="Retrieved playlist",
                        use_mobile=False):
    # Use innertube API (pbj=1 no longer works for many playlists)
--- a/youtube/yt_data_extract/init.py
+++ b/youtube/yt_data_extract/init.py
@@ -1,7 +1,7 @@
 from .common import (get, multi_get, deep_get, multi_deep_get,
    liberal_update, conservative_update, remove_redirect, normalize_url,
    extract_str, extract_formatted_text, extract_int, extract_approx_int,
-    extract_date, extract_item_info, extract_items, extract_response)
+    extract_date, extract_item_info, extract_items, extract_response, is_short)

 from .everything_else import (extract_channel_info, extract_search_info,
    extract_playlist_metadata, extract_playlist_info, extract_comments_info)
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -410,6 +410,51 @@ def extract_shorts_lockup_view_model_info(item, additional_info={}):
    return info


+def is_short(item_info):
+    """Check if a video item is a YouTube Short.
+
+    Shorts are identified by:
+    1. Duration < 60 seconds (typical Shorts length)
+    2. Having "Shorts" badge or type
+    3. Being extracted from shortsLockupViewModel or reelItemRenderer
+    """
+    if not item_info or item_info.get('error'):
+        return False
+
+    # Check renderer type
+    item_type = item_info.get('type', '')
+    if item_type == 'unsupported':
+        return False
+
+    # Check for "Shorts" badge
+    badges = item_info.get('badges', [])
+    if any('short' in str(badge).lower() for badge in badges):
+        return True
+
+    # Check duration (Shorts are typically < 60 seconds)
+    duration = item_info.get('duration')
+    if duration is not None:
+        # Duration can be string like "0:58" or "1:23:45" or int
+        if isinstance(duration, str):
+            # Parse duration string to seconds
+            parts = duration.split(':')
+            try:
+                if len(parts) == 2:  # MM:SS
+                    duration_seconds = int(parts[0]) * 60 + int(parts[1])
+                elif len(parts) == 3:  # HH:MM:SS
+                    duration_seconds = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+                else:
+                    duration_seconds = int(duration)
+                if duration_seconds < 60:
+                    return True
+            except (ValueError, IndexError):
+                pass
+        elif isinstance(duration, (int, float)) and duration < 60:
+            return True
+
+    return False
+
+
 def extract_item_info(item, additional_info={}):
    if not item:
        return {'error': 'No item given'}