fix: add support for YouTube Shorts tab on channel pages
All checks were successful
git-sync-with-mirror / git-sync (push) Successful in 13s
CI / test (push) Successful in 56s

- Rewrite channel_ctoken_v5 with correct protobuf field numbers per tab
  (videos=15, shorts=10, streams=14) based on Invidious source
- Replace broken pbj=1 endpoint with youtubei browse API for shorts/streams
- Add shortsLockupViewModel parser to extract video data from new YT format
- Fix channel metadata not loading (get_metadata now uses browse API)
- Fix metadata caching: skip caching when channel_name is absent
- Show actual item count instead of UU playlist count for shorts/streams
- Format view counts with spaced suffixes (7.1 K, 1.2 M, 3 B)
This commit is contained in:
2026-04-01 11:43:46 -05:00
parent bed14713ad
commit a374f90f6e
2 changed files with 152 additions and 81 deletions

View File

@@ -332,6 +332,84 @@ def extract_lockup_view_model_info(item, additional_info={}):
return info
def extract_shorts_lockup_view_model_info(item, additional_info={}):
"""Extract info from shortsLockupViewModel format (YouTube Shorts)"""
info = {'error': None, 'type': 'video'}
# Video ID from reelWatchEndpoint or entityId
info['id'] = deep_get(item,
'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId')
if not info['id']:
entity_id = item.get('entityId', '')
if entity_id.startswith('shorts-shelf-item-'):
info['id'] = entity_id[len('shorts-shelf-item-'):]
# Thumbnail
info['thumbnail'] = normalize_url(deep_get(item,
'onTap', 'innertubeCommand', 'reelWatchEndpoint',
'thumbnail', 'thumbnails', 0, 'url'))
# Parse title and views from accessibilityText
# Format: "Title, N views - play Short"
acc_text = item.get('accessibilityText', '')
info['title'] = ''
info['view_count'] = None
info['approx_view_count'] = None
if acc_text:
# Remove trailing " - play Short"
cleaned = re.sub(r'\s*-\s*play Short$', '', acc_text)
# Split on last comma+views pattern to separate title from view count
match = re.match(r'^(.*?),\s*([\d,.]+\s*(?:thousand|million|billion|)\s*views?)$',
cleaned, re.IGNORECASE)
if match:
info['title'] = match.group(1).strip()
view_text = match.group(2)
info['view_count'] = extract_int(view_text)
# Convert "7.1 thousand" -> "7.1 K" for display
suffix_map = {'thousand': 'K', 'million': 'M', 'billion': 'B'}
suffix_match = re.search(r'([\d,.]+)\s*(thousand|million|billion)?', view_text, re.IGNORECASE)
if suffix_match:
num = suffix_match.group(1)
word = suffix_match.group(2)
if word:
info['approx_view_count'] = num + ' ' + suffix_map[word.lower()]
else:
info['approx_view_count'] = '{:,}'.format(int(num.replace(',', ''))) if num.isdigit() or num.replace(',','').isdigit() else num
else:
info['approx_view_count'] = extract_approx_int(view_text)
else:
# Fallback: try "N views" at end
match2 = re.match(r'^(.*?),\s*(.+views?)$', cleaned, re.IGNORECASE)
if match2:
info['title'] = match2.group(1).strip()
info['approx_view_count'] = extract_approx_int(match2.group(2))
else:
info['title'] = cleaned
# Overlay text (usually has the title too)
overlay_metadata = deep_get(item, 'overlayMetadata',
'secondaryText', 'content')
if overlay_metadata and not info['approx_view_count']:
info['approx_view_count'] = extract_approx_int(overlay_metadata)
primary_text = deep_get(item, 'overlayMetadata',
'primaryText', 'content')
if primary_text and not info['title']:
info['title'] = primary_text
info['duration'] = ''
info['time_published'] = None
info['description'] = None
info['badges'] = []
info['author'] = None
info['author_id'] = None
info['author_url'] = None
info['index'] = None
info.update(additional_info)
return info
def extract_item_info(item, additional_info={}):
if not item:
return {'error': 'No item given'}
@@ -353,6 +431,10 @@ def extract_item_info(item, additional_info={}):
if type == 'lockupViewModel':
return extract_lockup_view_model_info(item, additional_info)
# Handle shortsLockupViewModel format (YouTube Shorts)
if type == 'shortsLockupViewModel':
return extract_shorts_lockup_view_model_info(item, additional_info)
# type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
# camelCase split, https://stackoverflow.com/a/37697078
type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
@@ -561,6 +643,7 @@ _item_types = {
# New viewModel format (YouTube 2024+)
'lockupViewModel',
'shortsLockupViewModel',
}
def _traverse_browse_renderer(renderer):