Add functional but preliminary channel tab support

Add channel tabs to the channel template and script
Update continuation token to request different tabs

Add support for 'reelItemRenderer' format required to extract shorts
This commit is contained in:
Jesus E 2023-06-17 16:05:40 -04:00
parent 74907a8183
commit f322035d4a
No known key found for this signature in database
GPG Key ID: 159C8F8BC9AED8B6
4 changed files with 75 additions and 21 deletions

View File

@ -32,16 +32,23 @@ real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
# added an extra nesting under the 2nd base64 compared to v4
# added tab support
def channel_ctoken_v5(channel_id, page, sort, tab, view=1):
new_sort = (2 if int(sort) == 1 else 1)
offset = str(30*(int(page) - 1))
if tab == 'videos':
tab = 15
elif tab == 'shorts':
tab = 10
elif tab == 'streams':
tab = 14
pointless_nest = proto.string(80226972,
proto.string(2, channel_id)
+ proto.string(3,
proto.percent_b64encode(
proto.string(110,
proto.string(3,
proto.string(15,
proto.string(tab,
proto.string(1,
proto.string(1,
proto.unpadded_b64encode(
@ -167,7 +174,7 @@ def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
tab = proto.string(2, tab)
sort = proto.uint(3, int(sort))
# page = proto.string(15, str(page) )
#page = proto.string(15, str(page))
shelf_view = proto.uint(4, 0)
view = proto.uint(6, int(view))
@ -202,7 +209,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
message = 'Got channel tab' if print_status else None
if not ctoken:
if tab == 'videos':
if tab in ('videos', 'shorts', 'streams'):
ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view)
else:
ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
@ -349,11 +356,11 @@ def post_process_channel_info(info):
info['links'][i] = (text, util.prefix_url(url))
def get_channel_first_page(base_url=None, channel_id=None):
def get_channel_first_page(base_url=None, channel_id=None, tab='videos'):
if channel_id:
base_url = 'https://www.youtube.com/channel/' + channel_id
return util.fetch_url(base_url + '/videos?pbj=1&view=0', headers_desktop,
debug_name='gen_channel_videos')
return util.fetch_url(base_url + '/' + tab + '?pbj=1&view=0',
headers_desktop, debug_name='gen_channel_' + tab)
playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
@ -374,24 +381,25 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
default_params = (page_number == 1 and sort == '3' and view == '1')
continuation = bool(ctoken) # whether or not we're using a continuation
if tab == 'videos' and channel_id and not default_params:
if (tab in ('videos', 'shorts', 'streams') and channel_id and
not default_params):
tasks = (
gevent.spawn(get_number_of_videos_channel, channel_id),
gevent.spawn(get_channel_tab, channel_id, page_number, sort,
'videos', view, ctoken)
tab, view, ctoken)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
continuation = True
elif tab == 'videos':
elif tab in ('videos', 'shorts', 'streams'):
if channel_id:
num_videos_call = (get_number_of_videos_channel, channel_id)
else:
num_videos_call = (get_number_of_videos_general, base_url)
tasks = (
gevent.spawn(*num_videos_call),
gevent.spawn(get_channel_first_page, base_url=base_url),
gevent.spawn(get_channel_first_page, base_url=base_url, tab=tab),
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
@ -440,13 +448,13 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
item.update(additional_info)
if info['error'] is not None:
return flask.render_template('error.html', error_message=info['error'])
return flask.render_template('error.html', error_message = info['error'])
if tab == 'videos':
if tab in ('videos', 'shorts', 'streams'):
info['number_of_videos'] = number_of_videos
info['number_of_pages'] = math.ceil(number_of_videos/30)
info['header_playlist_names'] = local_playlist.get_playlist_names()
if tab in ('videos', 'playlists'):
if tab in ('videos', 'shorts', 'streams', 'playlists'):
info['current_sort'] = sort
elif tab == 'search':
info['search_box_value'] = query
@ -457,9 +465,8 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
post_process_channel_info(info)
return flask.render_template(
'channel.html',
parameters_dictionary=request.args,
return flask.render_template('channel.html',
parameters_dictionary = request.args,
**info
)

View File

@ -33,7 +33,7 @@
<hr/>
<nav class="channel-tabs">
{% for tab_name in ('Videos', 'Playlists', 'About') %}
{% for tab_name in ('Videos', 'Shorts', 'Streams', 'Playlists', 'About') %}
{% if tab_name.lower() == current_tab %}
<a class="tab page-button">{{ tab_name }}</a>
{% else %}
@ -73,7 +73,7 @@
<!-- new-->
<div id="links-metadata">
{% if current_tab == 'videos' %}
{% if current_tab in ('videos', 'shorts', 'streams') %}
{% set sorts = [('1', 'views'), ('2', 'oldest'), ('3', 'newest')] %}
<div id="number-of-results">{{ number_of_videos }} videos</div>
{% elif current_tab == 'playlists' %}
@ -110,11 +110,11 @@
<hr/>
<footer class="pagination-container">
{% if current_tab == 'videos' and current_sort.__str__() == '2' %}
{% if (current_tab in ('videos', 'shorts', 'streams')) and current_sort.__str__() == '2' %}
<nav class="next-previous-button-row">
{{ common_elements.next_previous_ctoken_buttons(None, ctoken, channel_url + '/' + current_tab, parameters_dictionary) }}
</nav>
{% elif current_tab == 'videos' %}
{% elif current_tab in ('videos', 'shorts', 'streams') %}
<nav class="pagination-list">
{{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary, include_ends=(current_sort.__str__() == '3')) }}
</nav>

View File

@ -249,6 +249,9 @@ def extract_item_info(item, additional_info={}):
primary_type = type_parts[-2]
if primary_type == 'video':
info['type'] = 'video'
elif type_parts[0] == 'reel': # shorts
info['type'] = 'video'
primary_type = 'short'
elif primary_type in ('playlist', 'radio', 'show'):
info['type'] = 'playlist'
info['playlist_type'] = primary_type
@ -343,6 +346,48 @@ def extract_item_info(item, additional_info={}):
else:
info['index'] = None
elif primary_type == 'short':
info['id'] = item.get('videoId')
if not info['id']:
info['id'] = deep_get(item,'navigationEndpoint',
'reelWatchEndpoint', 'videoId')
info['approx_view_count'] = extract_approx_int(item.get('viewCountText'))
# handle case where it is "No views"
if not info['approx_view_count']:
if ('No views' in item.get('shortViewCountText', '')
or 'no views' in accessibility_label.lower()):
info['view_count'] = 0
info['approx_view_count'] = '0'
# dig into accessibility data to get duration for shorts
accessibility_label = multi_deep_get(item,
['accessibility', 'accessibilityData', 'label'],
default='')
duration = re.search(r'(\d+) (second|seconds|minute) - play video',
accessibility_label)
if duration.group(2) == 'minute':
info['duration'] = "1:00"
else:
info['duration'] = "0:" + duration.group(1).zfill(2)
# if it's an item in a playlist, get its index
if 'index' in item: # url has wrong index on playlist page
info['index'] = extract_int(item.get('index'))
elif 'indexText' in item:
# Current item in playlist has ▶ instead of the actual index, must
# dig into url
match = re.search(r'index=(\d+)', deep_get(item,
'navigationEndpoint', 'commandMetadata', 'webCommandMetadata',
'url', default=''))
if match is None: # worth a try then
info['index'] = extract_int(item.get('indexText'))
else:
info['index'] = int(match.group(1))
else:
info['index'] = None
elif primary_type in ('playlist', 'radio'):
info['id'] = item.get('playlistId')
info['video_count'] = extract_int(item.get('videoCount'))
@ -398,6 +443,8 @@ _item_types = {
'gridVideoRenderer',
'playlistVideoRenderer',
'reelItemRenderer',
'playlistRenderer',
'compactPlaylistRenderer',
'gridPlaylistRenderer',

View File

@ -73,7 +73,7 @@ def extract_channel_info(polymer_json, tab, continuation=False):
#if 'contents' not in response and 'continuationContents' not in response:
# return info
if tab in ('videos', 'playlists', 'search'):
if tab in ('videos', 'shorts', 'streams', 'playlists', 'search'):
items, ctoken = extract_items(response)
additional_info = {
'author': info['channel_name'],