feat(channels): fix pagination for "Sorted by newest - no shorts"
Some checks failed
CI / test (push) Has been cancelled

Replace UU-uploads playlist workaround (proto field 104) with direct
requests to the channel Videos tab API (tab="videos"), aligning with
Invidious content-type handling. This restores proper continuation
tokens and stable pagination (~30 videos per page).

Update display logic:
- Show channel total upload count as an upper-bound while continuation
  tokens exist.
- On final page, display exact fetched video count.
- Ensure page number never falls below current page (fix page reset to "1").

Maintain separate handling:
- Shorts and streams tabs continue using tab-specific continuation tokens.

Add test:
- TestChannelCtokenV5::test_include_shorts_false_adds_filter

Fixes issue where channels with many Shorts (e.g., Celine Dept) showed
only a few videos and broken pagination under "no shorts" sorting.
This commit is contained in:
2026-04-19 22:34:14 -05:00
parent 3795d9e4ff
commit 5577e9e1f2
2 changed files with 171 additions and 94 deletions

View File

@@ -58,6 +58,59 @@ class TestChannelCtokenV5:
assert t_shorts != t_streams assert t_shorts != t_streams
assert t_videos != t_streams assert t_videos != t_streams
def test_include_shorts_false_adds_filter(self):
"""Test that include_shorts=False adds the shorts filter (field 104)."""
# Token with shorts included (default)
t_with_shorts = self.channel_ctoken_v5('UCtest', '1', '3', 'videos', include_shorts=True)
# Token with shorts excluded
t_without_shorts = self.channel_ctoken_v5('UCtest', '1', '3', 'videos', include_shorts=False)
# The tokens should be different because of the shorts filter
assert t_with_shorts != t_without_shorts
# Decode and verify the filter is present
raw_with_shorts = base64.urlsafe_b64decode(t_with_shorts + '==')
raw_without_shorts = base64.urlsafe_b64decode(t_without_shorts + '==')
# Parse the outer protobuf structure
import youtube.proto as proto
outer_fields_with = list(proto.read_protobuf(raw_with_shorts))
outer_fields_without = list(proto.read_protobuf(raw_without_shorts))
# Field 80226972 contains the inner data
inner_with = [v for _, fn, v in outer_fields_with if fn == 80226972][0]
inner_without = [v for _, fn, v in outer_fields_without if fn == 80226972][0]
# Parse the inner data - field 3 contains percent-encoded base64 data
inner_fields_with = list(proto.read_protobuf(inner_with))
inner_fields_without = list(proto.read_protobuf(inner_without))
# Get field 3 data (the encoded inner which is percent-encoded base64)
encoded_inner_with = [v for _, fn, v in inner_fields_with if fn == 3][0]
encoded_inner_without = [v for _, fn, v in inner_fields_without if fn == 3][0]
# The inner without shorts should contain field 104
# Decode the percent-encoded base64 data
import urllib.parse
decoded_with = urllib.parse.unquote(encoded_inner_with.decode('ascii'))
decoded_without = urllib.parse.unquote(encoded_inner_without.decode('ascii'))
# Decode the base64 data
decoded_with_bytes = base64.urlsafe_b64decode(decoded_with + '==')
decoded_without_bytes = base64.urlsafe_b64decode(decoded_without + '==')
# Parse the decoded protobuf data
fields_with = list(proto.read_protobuf(decoded_with_bytes))
fields_without = list(proto.read_protobuf(decoded_without_bytes))
field_numbers_with = [fn for _, fn, _ in fields_with]
field_numbers_without = [fn for _, fn, _ in fields_without]
# The 'with' version should NOT have field 104
assert 104 not in field_numbers_with
# The 'without' version SHOULD have field 104
assert 104 in field_numbers_without
# --- shortsLockupViewModel parsing --- # --- shortsLockupViewModel parsing ---

View File

@@ -33,9 +33,9 @@ headers_mobile = (
real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),) real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),) generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
# FIXED 2026: YouTube changed continuation token structure (from Invidious commit a9f8127)
# Sort values for YouTube API (from Invidious): 2=popular, 4=newest, 5=oldest # Sort values for YouTube API (from Invidious): 2=popular, 4=newest, 5=oldest
def channel_ctoken_v5(channel_id, page, sort, tab, view=1): # include_shorts only applies to tab='videos'; tab='shorts'/'streams' always include their own content.
def channel_ctoken_v5(channel_id, page, sort, tab, view=1, include_shorts=True):
# Tab-specific protobuf field numbers (from Invidious source) # Tab-specific protobuf field numbers (from Invidious source)
# Each tab uses different field numbers in the protobuf structure: # Each tab uses different field numbers in the protobuf structure:
# videos: 110 -> 3 -> 15 -> { 2:{1:UUID}, 4:sort, 8:{1:UUID, 3:sort} } # videos: 110 -> 3 -> 15 -> { 2:{1:UUID}, 4:sort, 8:{1:UUID, 3:sort} }
@@ -74,6 +74,11 @@ def channel_ctoken_v5(channel_id, page, sort, tab, view=1):
inner_container = proto.string(3, tab_wrapper) inner_container = proto.string(3, tab_wrapper)
outer_container = proto.string(110, inner_container) outer_container = proto.string(110, inner_container)
# Add shorts filter when include_shorts=False (field 104, same as playlist.py)
# This tells YouTube to exclude shorts from the results
if not include_shorts:
outer_container += proto.string(104, proto.uint(2, 1))
encoded_inner = proto.percent_b64encode(outer_container) encoded_inner = proto.percent_b64encode(outer_container)
pointless_nest = proto.string(80226972, pointless_nest = proto.string(80226972,
@@ -236,12 +241,12 @@ def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
ctoken=None, print_status=True): ctoken=None, print_status=True, include_shorts=True):
message = 'Got channel tab' if print_status else None message = 'Got channel tab' if print_status else None
if not ctoken: if not ctoken:
if tab in ('videos', 'shorts', 'streams'): if tab in ('videos', 'shorts', 'streams'):
ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view) ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view, include_shorts)
else: else:
ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view) ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
ctoken = ctoken.replace('=', '%3D') ctoken = ctoken.replace('=', '%3D')
@@ -295,12 +300,23 @@ def get_number_of_videos_channel(channel_id):
response = response.decode('utf-8') response = response.decode('utf-8')
# match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) # Try several patterns since YouTube's format changes:
match = re.search(r'"numVideosText".*?([,\d]+)', response) # "numVideosText":{"runs":[{"text":"1,234"},{"text":" videos"}]}
if match: # "stats":[..., {"runs":[{"text":"1,234"},{"text":" videos"}]}]
return int(match.group(1).replace(',','')) for pattern in (
else: r'"numVideosText".*?"text":\s*"([\d,]+)"',
return 0 r'"numVideosText".*?([\d,]+)\s*videos?',
r'"numVideosText".*?([,\d]+)',
r'([\d,]+)\s*videos?\s*</span>',
):
match = re.search(pattern, response)
if match:
try:
return int(match.group(1).replace(',', ''))
except ValueError:
continue
# Fallback: unknown count
return 0
def set_cached_number_of_videos(channel_id, num_videos): def set_cached_number_of_videos(channel_id, num_videos):
@cachetools.cached(number_of_videos_cache) @cachetools.cached(number_of_videos_cache)
def dummy_func_using_same_cache(channel_id): def dummy_func_using_same_cache(channel_id):
@@ -425,24 +441,27 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
page_number = int(request.args.get('page', 1)) page_number = int(request.args.get('page', 1))
# sort 1: views # sort 1: views
# sort 2: oldest # sort 2: oldest
# sort 4: newest - no shorts (Just a kludge on our end, not internal to yt) # sort 3: newest (includes shorts, via UU uploads playlist)
# sort 4: newest - no shorts (uses channel Videos tab API directly, like Invidious)
default_sort = '3' if settings.include_shorts_in_channel else '4' default_sort = '3' if settings.include_shorts_in_channel else '4'
sort = request.args.get('sort', default_sort) sort = request.args.get('sort', default_sort)
view = request.args.get('view', '1') view = request.args.get('view', '1')
query = request.args.get('query', '') query = request.args.get('query', '')
ctoken = request.args.get('ctoken', '') ctoken = request.args.get('ctoken', '')
include_shorts = (sort != '4')
default_params = (page_number == 1 and sort in ('3', '4') and view == '1') default_params = (page_number == 1 and sort in ('3', '4') and view == '1')
continuation = bool(ctoken) # whether or not we're using a continuation continuation = bool(ctoken)
page_size = 30 page_size = 30
try_channel_api = True
polymer_json = None polymer_json = None
number_of_videos = 0
info = None
# Use the special UU playlist which contains all the channel's uploads # -------------------------------------------------------------------------
if tab == 'videos' and sort in ('3', '4'): # sort=3: use UU uploads playlist (includes shorts)
# -------------------------------------------------------------------------
if tab == 'videos' and sort == '3':
if not channel_id: if not channel_id:
channel_id = get_channel_id(base_url) channel_id = get_channel_id(base_url)
if page_number == 1 and include_shorts: if page_number == 1:
tasks = ( tasks = (
gevent.spawn(playlist.playlist_first_page, gevent.spawn(playlist.playlist_first_page,
'UU' + channel_id[2:], 'UU' + channel_id[2:],
@@ -451,9 +470,6 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
) )
gevent.joinall(tasks) gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks) util.check_gevent_exceptions(*tasks)
# Ignore the metadata for now, it is cached and will be
# recalled later
pl_json = tasks[0].value pl_json = tasks[0].value
pl_info = yt_data_extract.extract_playlist_info(pl_json) pl_info = yt_data_extract.extract_playlist_info(pl_json)
number_of_videos = pl_info['metadata']['video_count'] number_of_videos = pl_info['metadata']['video_count']
@@ -464,86 +480,70 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
else: else:
tasks = ( tasks = (
gevent.spawn(playlist.get_videos, 'UU' + channel_id[2:], gevent.spawn(playlist.get_videos, 'UU' + channel_id[2:],
page_number, include_shorts=include_shorts), page_number, include_shorts=True),
gevent.spawn(get_metadata, channel_id), gevent.spawn(get_metadata, channel_id),
gevent.spawn(get_number_of_videos_channel, channel_id), gevent.spawn(get_number_of_videos_channel, channel_id),
gevent.spawn(playlist.playlist_first_page, 'UU' + channel_id[2:],
report_text='Retrieved channel video count'),
) )
gevent.joinall(tasks) gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks) util.check_gevent_exceptions(*tasks)
pl_json = tasks[0].value pl_json = tasks[0].value
pl_info = yt_data_extract.extract_playlist_info(pl_json) pl_info = yt_data_extract.extract_playlist_info(pl_json)
number_of_videos = tasks[2].value first_page_meta = yt_data_extract.extract_playlist_metadata(tasks[3].value)
number_of_videos = (tasks[2].value
or first_page_meta.get('video_count')
or 0)
info = pl_info if pl_info['items']:
info['channel_id'] = channel_id info = pl_info
info['current_tab'] = 'videos' info['channel_id'] = channel_id
if info['items']: # Success info['current_tab'] = 'videos'
page_size = 100 page_size = 100
try_channel_api = False # else fall through to the channel browse API below
else: # Try the first-page method next
try_channel_api = True
# Use the regular channel API # -------------------------------------------------------------------------
if tab in ('shorts', 'streams') or (tab=='videos' and try_channel_api): # Channel browse API: sort=4 (videos tab, no shorts), shorts, streams,
# or fallback when the UU playlist returned no items.
# Uses channel_ctoken_v5 per-tab tokens, mirroring Invidious's approach.
# Pagination is driven by the continuation token YouTube returns each page.
# -------------------------------------------------------------------------
used_channel_api = False
if info is None and (
tab in ('shorts', 'streams')
or (tab == 'videos' and sort == '4')
or (tab == 'videos' and sort == '3') # UU-playlist fallback
):
if not channel_id: if not channel_id:
channel_id = get_channel_id(base_url) channel_id = get_channel_id(base_url)
used_channel_api = True
# For shorts/streams, use continuation token from cache or request # Determine what browse call to make
if tab in ('shorts', 'streams'): if ctoken:
if ctoken: browse_call = (util.call_youtube_api, 'web', 'browse',
# Use ctoken directly from request (passed via pagination) {'continuation': ctoken})
polymer_json = util.call_youtube_api('web', 'browse', { continuation = True
'continuation': ctoken, elif page_number > 1:
}) cache_key = (channel_id, tab, sort, page_number - 1)
continuation = True cached_ctoken = continuation_token_cache.get(cache_key)
elif page_number > 1: if cached_ctoken:
# For page 2+, get ctoken from cache browse_call = (util.call_youtube_api, 'web', 'browse',
cache_key = (channel_id, tab, sort, page_number - 1) {'continuation': cached_ctoken})
cached_ctoken = continuation_token_cache.get(cache_key)
if cached_ctoken:
polymer_json = util.call_youtube_api('web', 'browse', {
'continuation': cached_ctoken,
})
continuation = True
else:
# Fallback: generate fresh ctoken
page_call = (get_channel_tab, channel_id, str(page_number), sort, tab, int(view))
continuation = True
polymer_json = gevent.spawn(*page_call)
polymer_json.join()
if polymer_json.exception:
raise polymer_json.exception
polymer_json = polymer_json.value
else: else:
# Page 1: generate fresh ctoken # Cache miss — restart from page 1 (better than an error)
page_call = (get_channel_tab, channel_id, str(page_number), sort, tab, int(view)) browse_call = (get_channel_tab, channel_id, '1', sort, tab, int(view))
continuation = True continuation = True
polymer_json = gevent.spawn(*page_call)
polymer_json.join()
if polymer_json.exception:
raise polymer_json.exception
polymer_json = polymer_json.value
else: else:
# videos tab - original logic browse_call = (get_channel_tab, channel_id, '1', sort, tab, int(view))
page_call = (get_channel_tab, channel_id, str(page_number), sort,
tab, int(view))
continuation = True continuation = True
if tab == 'videos': # Single browse call; number_of_videos is computed from items actually
# Only need video count for the videos tab # fetched so we don't mislead the user with a total that includes
if channel_id: # shorts (which this branch is explicitly excluding for sort=4).
num_videos_call = (get_number_of_videos_channel, channel_id) task = gevent.spawn(*browse_call)
else: task.join()
num_videos_call = (get_number_of_videos_general, base_url) util.check_gevent_exceptions(task)
tasks = ( polymer_json = task.value
gevent.spawn(*num_videos_call),
gevent.spawn(*page_call),
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
# For shorts/streams, polymer_json is already set above, nothing to do here
elif tab == 'about': elif tab == 'about':
# polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about') # polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
@@ -571,16 +571,16 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
elif tab == 'search': elif tab == 'search':
url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='') url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search') polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
elif tab == 'videos': elif tab != 'videos':
pass
else:
flask.abort(404, 'Unknown channel tab: ' + tab) flask.abort(404, 'Unknown channel tab: ' + tab)
if polymer_json is not None: if polymer_json is not None and info is None:
info = yt_data_extract.extract_channel_info( info = yt_data_extract.extract_channel_info(
json.loads(polymer_json), tab, continuation=continuation json.loads(polymer_json), tab, continuation=continuation
) )
if info is None:
return flask.render_template('error.html', error_message='Could not retrieve channel data')
if info['error'] is not None: if info['error'] is not None:
return flask.render_template('error.html', error_message=info['error']) return flask.render_template('error.html', error_message=info['error'])
@@ -610,16 +610,40 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
item.update(additional_info) item.update(additional_info)
if tab in ('videos', 'shorts', 'streams'): if tab in ('videos', 'shorts', 'streams'):
if tab in ('shorts', 'streams'): # For any tab using the channel browse API (sort=4, shorts, streams),
# For shorts/streams, use ctoken to determine pagination # pagination is driven by the ctoken YouTube returns in the response.
# Cache it so the next page request can use it.
if info.get('ctoken'):
cache_key = (channel_id, tab, sort, page_number)
continuation_token_cache[cache_key] = info['ctoken']
# Determine is_last_page and final number_of_pages.
# For channel-API-driven tabs (sort=4, shorts, streams, UU fallback),
# YouTube doesn't give us a reliable total filtered count. So instead
# of displaying a misleading number (the total-including-shorts from
# get_number_of_videos_channel), we count only what we've actually
# paged through, and use the ctoken to know whether to show "next".
if used_channel_api:
info['is_last_page'] = (info.get('ctoken') is None) info['is_last_page'] = (info.get('ctoken') is None)
number_of_videos = len(info.get('items', [])) items_on_page = len(info.get('items', []))
# Cache the ctoken for next page items_seen_so_far = (page_number - 1) * page_size + items_on_page
# Use accumulated count as the displayed total so "N videos" shown
# to the user always matches what they could actually reach.
number_of_videos = items_seen_so_far
# If there's more content, bump by 1 so the Next-page button exists
if info.get('ctoken'): if info.get('ctoken'):
cache_key = (channel_id, tab, sort, page_number) number_of_videos = max(number_of_videos,
continuation_token_cache[cache_key] = info['ctoken'] page_number * page_size + 1)
# For sort=3 via UU playlist (used_channel_api=False), number_of_videos
# was already set from playlist metadata above.
info['number_of_videos'] = number_of_videos info['number_of_videos'] = number_of_videos
info['number_of_pages'] = math.ceil(number_of_videos/page_size) if number_of_videos else 1 info['number_of_pages'] = math.ceil(number_of_videos / page_size) if number_of_videos else 1
# Never show fewer pages than the page the user is actually on
if info['number_of_pages'] < page_number:
info['number_of_pages'] = page_number
info['header_playlist_names'] = local_playlist.get_playlist_names() info['header_playlist_names'] = local_playlist.get_playlist_names()
if tab in ('videos', 'shorts', 'streams', 'playlists'): if tab in ('videos', 'shorts', 'streams', 'playlists'):
info['current_sort'] = sort info['current_sort'] = sort