fix: add support for YouTube Shorts tab on channel pages
- Rewrite channel_ctoken_v5 with correct protobuf field numbers per tab (videos=15, shorts=10, streams=14) based on Invidious source - Replace broken pbj=1 endpoint with youtubei browse API for shorts/streams - Add shortsLockupViewModel parser to extract video data from new YT format - Fix channel metadata not loading (get_metadata now uses browse API) - Fix metadata caching: skip caching when channel_name is absent - Show actual item count instead of UU playlist count for shorts/streams - Format view counts with spaced suffixes (7.1 K, 1.2 M, 3 B)
This commit is contained in:
@@ -332,6 +332,84 @@ def extract_lockup_view_model_info(item, additional_info={}):
|
||||
return info
|
||||
|
||||
|
||||
def extract_shorts_lockup_view_model_info(item, additional_info={}):
|
||||
"""Extract info from shortsLockupViewModel format (YouTube Shorts)"""
|
||||
info = {'error': None, 'type': 'video'}
|
||||
|
||||
# Video ID from reelWatchEndpoint or entityId
|
||||
info['id'] = deep_get(item,
|
||||
'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId')
|
||||
if not info['id']:
|
||||
entity_id = item.get('entityId', '')
|
||||
if entity_id.startswith('shorts-shelf-item-'):
|
||||
info['id'] = entity_id[len('shorts-shelf-item-'):]
|
||||
|
||||
# Thumbnail
|
||||
info['thumbnail'] = normalize_url(deep_get(item,
|
||||
'onTap', 'innertubeCommand', 'reelWatchEndpoint',
|
||||
'thumbnail', 'thumbnails', 0, 'url'))
|
||||
|
||||
# Parse title and views from accessibilityText
|
||||
# Format: "Title, N views - play Short"
|
||||
acc_text = item.get('accessibilityText', '')
|
||||
info['title'] = ''
|
||||
info['view_count'] = None
|
||||
info['approx_view_count'] = None
|
||||
if acc_text:
|
||||
# Remove trailing " - play Short"
|
||||
cleaned = re.sub(r'\s*-\s*play Short$', '', acc_text)
|
||||
# Split on last comma+views pattern to separate title from view count
|
||||
match = re.match(r'^(.*?),\s*([\d,.]+\s*(?:thousand|million|billion|)\s*views?)$',
|
||||
cleaned, re.IGNORECASE)
|
||||
if match:
|
||||
info['title'] = match.group(1).strip()
|
||||
view_text = match.group(2)
|
||||
info['view_count'] = extract_int(view_text)
|
||||
# Convert "7.1 thousand" -> "7.1 K" for display
|
||||
suffix_map = {'thousand': 'K', 'million': 'M', 'billion': 'B'}
|
||||
suffix_match = re.search(r'([\d,.]+)\s*(thousand|million|billion)?', view_text, re.IGNORECASE)
|
||||
if suffix_match:
|
||||
num = suffix_match.group(1)
|
||||
word = suffix_match.group(2)
|
||||
if word:
|
||||
info['approx_view_count'] = num + ' ' + suffix_map[word.lower()]
|
||||
else:
|
||||
info['approx_view_count'] = '{:,}'.format(int(num.replace(',', ''))) if num.isdigit() or num.replace(',','').isdigit() else num
|
||||
else:
|
||||
info['approx_view_count'] = extract_approx_int(view_text)
|
||||
else:
|
||||
# Fallback: try "N views" at end
|
||||
match2 = re.match(r'^(.*?),\s*(.+views?)$', cleaned, re.IGNORECASE)
|
||||
if match2:
|
||||
info['title'] = match2.group(1).strip()
|
||||
info['approx_view_count'] = extract_approx_int(match2.group(2))
|
||||
else:
|
||||
info['title'] = cleaned
|
||||
|
||||
# Overlay text (usually has the title too)
|
||||
overlay_metadata = deep_get(item, 'overlayMetadata',
|
||||
'secondaryText', 'content')
|
||||
if overlay_metadata and not info['approx_view_count']:
|
||||
info['approx_view_count'] = extract_approx_int(overlay_metadata)
|
||||
|
||||
primary_text = deep_get(item, 'overlayMetadata',
|
||||
'primaryText', 'content')
|
||||
if primary_text and not info['title']:
|
||||
info['title'] = primary_text
|
||||
|
||||
info['duration'] = ''
|
||||
info['time_published'] = None
|
||||
info['description'] = None
|
||||
info['badges'] = []
|
||||
info['author'] = None
|
||||
info['author_id'] = None
|
||||
info['author_url'] = None
|
||||
info['index'] = None
|
||||
|
||||
info.update(additional_info)
|
||||
return info
|
||||
|
||||
|
||||
def extract_item_info(item, additional_info={}):
|
||||
if not item:
|
||||
return {'error': 'No item given'}
|
||||
@@ -353,6 +431,10 @@ def extract_item_info(item, additional_info={}):
|
||||
if type == 'lockupViewModel':
|
||||
return extract_lockup_view_model_info(item, additional_info)
|
||||
|
||||
# Handle shortsLockupViewModel format (YouTube Shorts)
|
||||
if type == 'shortsLockupViewModel':
|
||||
return extract_shorts_lockup_view_model_info(item, additional_info)
|
||||
|
||||
# type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
|
||||
# camelCase split, https://stackoverflow.com/a/37697078
|
||||
type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
|
||||
@@ -561,6 +643,7 @@ _item_types = {
|
||||
|
||||
# New viewModel format (YouTube 2024+)
|
||||
'lockupViewModel',
|
||||
'shortsLockupViewModel',
|
||||
}
|
||||
|
||||
def _traverse_browse_renderer(renderer):
|
||||
|
||||
Reference in New Issue
Block a user