Fix channel about tab

2024-01-22 06:29:42 +08:00
parent 2463af7685
commit 5f3b90ad45
7 changed files with 229 additions and 88 deletions
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -84,6 +84,40 @@ def channel_ctoken_v5(channel_id, page, sort, tab, view=1):

    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')

+
+def channel_about_ctoken(channel_id):
+    return proto.make_protobuf(
+        ('base64p',
+         [
+          [2, 80226972,
+           [
+            [2, 2, channel_id],
+            [2, 3,
+             ('base64p',
+              [
+               [2, 110,
+                [
+                 [2, 3,
+                  [
+                   [2, 19,
+                    [
+                     [2, 1, b'66b0e9e9-0000-2820-9589-582429a83980'],
+                    ]
+                   ],
+                  ]
+                 ],
+                ]
+               ],
+              ]
+             )
+            ],
+           ]
+          ],
+         ]
+        )
+    )
+
+
 # https://github.com/user234683/youtube-local/issues/151
 def channel_ctoken_v4(channel_id, page, sort, tab, view=1):
    new_sort = (2 if int(sort) == 1 else 1)
@@ -359,7 +393,7 @@ def post_process_channel_info(info):
        util.add_extra_html_info(item)
    if info['current_tab'] == 'about':
        for i, (text, url) in enumerate(info['links']):
-            if util.YOUTUBE_URL_RE.fullmatch(url):
+            if isinstance(url, str) and util.YOUTUBE_URL_RE.fullmatch(url):
                info['links'][i] = (text, util.prefix_url(url))


@@ -469,7 +503,13 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

    elif tab == 'about':
-        polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
+        # polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
+        channel_id = get_channel_id(base_url)
+        ctoken = channel_about_ctoken(channel_id)
+        polymer_json = util.call_youtube_api('web', 'browse', {
+            'continuation': ctoken,
+        })
+        continuation=True
    elif tab == 'playlists' and page_number == 1:
        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists')
    elif tab == 'playlists':
@@ -491,6 +531,9 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
            json.loads(polymer_json), tab, continuation=continuation
        )

+    if info['error'] is not None:
+        return flask.render_template('error.html', error_message=info['error'])
+
    if channel_id:
        info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
        info['channel_id'] = channel_id
@@ -498,7 +541,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
        channel_id = info['channel_id']

    # Will have microformat present, cache metadata while we have it
-    if channel_id and default_params and tab != 'videos':
+    if channel_id and default_params and tab not in ('videos', 'about'):
        metadata = extract_metadata_for_caching(info)
        set_cached_metadata(channel_id, metadata)
    # Otherwise, populate with our (hopefully cached) metadata
@@ -515,9 +558,6 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
        for item in info['items']:
            item.update(additional_info)

-    if info['error'] is not None:
-        return flask.render_template('error.html', error_message = info['error'])
-
    if tab in ('videos', 'shorts', 'streams'):
        info['number_of_videos'] = number_of_videos
        info['number_of_pages'] = math.ceil(number_of_videos/page_size)
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -141,6 +141,17 @@ base64_enc_funcs = {


 def _make_protobuf(data):
+    '''
+    Input: Recursive list of protobuf objects or base-64 encodings
+    Output: Protobuf bytestring
+    Each protobuf object takes the form [wire_type, field_number, field_data]
+    If a string protobuf has a list/tuple of length 2, this has the form
+    (base64 type, data)
+    The base64 types are
+    - base64 means a base64 encode with equals sign paddings
+    - base64s means a base64 encode without padding
+    - base64p means a url base64 encode with equals signs replaced with %3D
+    '''
    # must be dict mapping field_number to [wire_type, value]
    if isinstance(data, dict):
        new_data = []
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@@ -51,8 +51,11 @@
            <ul>
                {% for (before_text, stat, after_text) in [
                    ('Joined ', date_joined, ''),
-                    ('', view_count|commatize, ' views'),
+                    ('', approx_view_count, ' views'),
                    ('', approx_subscriber_count, ' subscribers'),
+                    ('', approx_video_count, ' videos'),
+                    ('Country: ', country, ''),
+                    ('Canonical Url: ', canonical_url, ''),
                ] %}
                    {% if stat %}
                        <li>{{ before_text + stat|string + after_text }}</li>
@@ -65,7 +68,11 @@
            <hr>
            <ul>
                {% for text, url in links %}
+                    {% if url %}
                        <li><a href="{{ url }}">{{ text }}</a></li>
+                    {% else %}
+                        <li>{{ text }}</li>
+                    {% endif %}
                {% endfor %}
            </ul>
        </div>
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -665,6 +665,85 @@ def to_valid_filename(name):
    return name


+# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
+INNERTUBE_CLIENTS = {
+    'android': {
+        'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'hl': 'en',
+                'gl': 'US',
+                'clientName': 'ANDROID',
+                'clientVersion': '17.31.35',
+                'osName': 'Android',
+                'osVersion': '12',
+                'androidSdkVersion': 31,
+                'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 12) gzip'
+            },
+            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+            #'thirdParty': {
+            #    'embedUrl': 'https://google.com',  # Can be any valid URL
+            #}
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+        'REQUIRE_JS_PLAYER': False,
+    },
+
+    # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
+    # See: https://github.com/zerodytrash/YouTube-Internal-Clients
+    'tv_embedded': {
+        'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'hl': 'en',
+                'gl': 'US',
+                'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
+                'clientVersion': '2.0',
+            },
+            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+            'thirdParty': {
+                'embedUrl': 'https://google.com',  # Can be any valid URL
+            }
+
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
+        'REQUIRE_JS_PLAYER': True,
+    },
+
+    'web': {
+        'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+        'INNERTUBE_CONTEXT': {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': '2.20220801.00.00',
+                'userAgent': desktop_user_agent,
+            }
+        },
+        'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+    },
+}
+
+
+def call_youtube_api(client, api, data):
+    client_params = INNERTUBE_CLIENTS[client]
+    context = client_params['INNERTUBE_CONTEXT']
+    key = client_params['INNERTUBE_API_KEY']
+    host = client_params.get('INNERTUBE_HOST') or 'www.youtube.com'
+    user_agent = context['client'].get('userAgent') or mobile_user_agent
+
+    url = 'https://' + host + '/youtubei/v1/' + api + '?key=' + key
+    data['context'] = context
+
+    data = json.dumps(data)
+    headers = (('Content-Type', 'application/json'),('User-Agent', user_agent))
+    response = fetch_url(
+        url, data=data, headers=headers,
+        debug_name='youtubei_' + api + '_' + client,
+        report_text='Fetched ' + client + ' youtubei ' + api
+    ).decode('utf-8')
+    return response
+
+
 def strip_non_ascii(string):
    ''' Returns the string without non ASCII characters'''
    stripped = (c for c in string if 0 < ord(c) < 127)
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -19,51 +19,6 @@ from urllib.parse import parse_qs, urlencode
 from types import SimpleNamespace
 from math import ceil

-# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
-INNERTUBE_CLIENTS = {
-    'android': {
-        'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
-        'INNERTUBE_CONTEXT': {
-            'client': {
-                'hl': 'en',
-                'gl': 'US',
-                'clientName': 'ANDROID',
-                'clientVersion': '17.31.35',
-                'osName': 'Android',
-                'osVersion': '12',
-                'androidSdkVersion': 31,
-                'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 12) gzip'
-            },
-            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
-            #'thirdParty': {
-            #    'embedUrl': 'https://google.com',  # Can be any valid URL
-            #}
-        },
-        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
-        'REQUIRE_JS_PLAYER': False,
-    },
-
-    # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
-    # See: https://github.com/zerodytrash/YouTube-Internal-Clients
-    'tv_embedded': {
-        'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
-        'INNERTUBE_CONTEXT': {
-            'client': {
-                'hl': 'en',
-                'gl': 'US',
-                'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
-                'clientVersion': '2.0',
-            },
-            # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
-            'thirdParty': {
-                'embedUrl': 'https://google.com',  # Can be any valid URL
-            }
-
-        },
-        'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
-        'REQUIRE_JS_PLAYER': True,
-    },
-}

 try:
    with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@@ -386,26 +341,11 @@ def _add_to_error(info, key, additional_message):


 def fetch_player_response(client, video_id):
-    client_params = INNERTUBE_CLIENTS[client]
-    context = client_params['INNERTUBE_CONTEXT']
-    key = client_params['INNERTUBE_API_KEY']
-    host = client_params.get('INNERTUBE_HOST') or 'www.youtube.com'
-    user_agent = context['client'].get('userAgent') or util.mobile_user_agent
-
-    url = 'https://' + host + '/youtubei/v1/player?key=' + key
-    data = {
+    return util.call_youtube_api(client, 'player', {
        'videoId': video_id,
-        'context': context,
        'params': 'CgIQBg',
-    }
-    data = json.dumps(data)
-    headers = (('Content-Type', 'application/json'),('User-Agent', user_agent))
-    player_response = util.fetch_url(
-        url, data=data, headers=headers,
-        debug_name='youtubei_player_' + client,
-        report_text='Fetched ' + client + ' youtubei player'
-    ).decode('utf-8')
-    return player_response
+    })
+

 def fetch_watch_page_info(video_id, playlist_id, index):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -185,7 +185,7 @@ def extract_int(string, default=None, whole_word=True):
        return default

 def extract_approx_int(string):
-    '''e.g. "15.1M" from "15.1M subscribers"'''
+    '''e.g. "15.1M" from "15.1M subscribers" or '4,353' from 4353'''
    if not isinstance(string, str):
        string = extract_str(string)
    if not string:
@@ -193,7 +193,10 @@ def extract_approx_int(string):
    match = re.search(r'\b(\d+(?:\.\d+)?[KMBTkmbt]?)\b', string.replace(',', ''))
    if match is None:
        return None
-    return match.group(1)
+    result = match.group(1)
+    if re.fullmatch(r'\d+', result):
+        result = '{:,}'.format(int(result))
+    return result

 MONTH_ABBREVIATIONS = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'}
 def extract_date(date_text):
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -85,23 +85,84 @@ def extract_channel_info(polymer_json, tab, continuation=False):
        if tab in ('search', 'playlists'):
            info['is_last_page'] = (ctoken is None)
    elif tab == 'about':
-        items, _ = extract_items(response, item_types={'channelAboutFullMetadataRenderer'})
-        if not items:
-            info['error'] = 'Could not find channelAboutFullMetadataRenderer'
+        # Latest type
+        items, _ = extract_items(response, item_types={'aboutChannelRenderer'})
+        if items:
+            a_metadata = deep_get(items, 0, 'aboutChannelRenderer',
+                'metadata', 'aboutChannelViewModel')
+            if not a_metadata:
+                info['error'] = 'Could not find aboutChannelViewModel'
                return info
-        channel_metadata = items[0]['channelAboutFullMetadataRenderer']

            info['links'] = []
-        for link_json in channel_metadata.get('primaryLinks', ()):
-            url = remove_redirect(deep_get(link_json, 'navigationEndpoint', 'urlEndpoint', 'url'))
-            if not (url.startswith('http://') or url.startswith('https://')):
-                url = 'http://' + url
+            for link_outer in a_metadata.get('links', ()):
+                link = link_outer.get('channelExternalLinkViewModel') or {}
+                link_content = extract_str(deep_get(link, 'link', 'content'))
+                for run in deep_get(link, 'link', 'commandRuns') or ():
+                    url = remove_redirect(deep_get(run, 'onTap',
+                        'innertubeCommand', 'urlEndpoint', 'url'))
+                    if url and not (url.startswith('http://')
+                            or url.startswith('https://')):
+                        url = 'https://' + url
+                    if link_content is None or (link_content in url):
+                        break
+                else: # didn't break
+                    url = link_content
+                    if url and not (url.startswith('http://')
+                            or url.startswith('https://')):
+                        url = 'https://' + url
+                text = extract_str(deep_get(link, 'title', 'content'))
+                info['links'].append( (text, url) )
+
+            info['date_joined'] = extract_date(
+                a_metadata.get('joinedDateText')
+            )
+            info['view_count'] = extract_int(a_metadata.get('viewCountText'))
+            info['approx_view_count'] = extract_approx_int(
+                a_metadata.get('viewCountText')
+            )
+            info['description'] = extract_str(
+                a_metadata.get('description'), default=''
+            )
+            info['approx_video_count'] = extract_approx_int(
+                a_metadata.get('videoCountText')
+            )
+            info['approx_subscriber_count'] = extract_approx_int(
+                a_metadata.get('subscriberCountText')
+            )
+            info['country'] = extract_str(a_metadata.get('country'))
+            info['canonical_url'] = extract_str(
+                a_metadata.get('canonicalChannelUrl')
+            )
+
+        # Old type
+        else:
+            items, _ = extract_items(response,
+                item_types={'channelAboutFullMetadataRenderer'})
+            if not items:
+                info['error'] = 'Could not find aboutChannelRenderer or channelAboutFullMetadataRenderer'
+                return info
+            a_metadata = items[0]['channelAboutFullMetadataRenderer']
+
+            info['links'] = []
+            for link_json in a_metadata.get('primaryLinks', ()):
+                url = remove_redirect(deep_get(link_json, 'navigationEndpoint',
+                    'urlEndpoint', 'url'))
+                if url and not (url.startswith('http://')
+                                or url.startswith('https://')):
+                    url = 'https://' + url
                text = extract_str(link_json.get('title'))
                info['links'].append( (text, url) )

-        info['date_joined'] = extract_date(channel_metadata.get('joinedDateText'))
-        info['view_count'] = extract_int(channel_metadata.get('viewCountText'))
-        info['description'] = extract_str(channel_metadata.get('description'), default='')
+            info['date_joined'] = extract_date(a_metadata.get('joinedDateText'))
+            info['view_count'] = extract_int(a_metadata.get('viewCountText'))
+            info['description'] = extract_str(a_metadata.get(
+                'description'), default='')
+
+            info['approx_video_count'] = None
+            info['approx_subscriber_count'] = None
+            info['country'] = None
+            info['canonical_url'] = None
    else:
        raise NotImplementedError('Unknown or unsupported channel tab: ' + tab)