Files
yt-local/youtube/playlist.py
Astounds 06051dd127
All checks were successful
git-sync-with-mirror / git-sync (push) Successful in 13s
CI / test (push) Successful in 51s
fix: support YouTube 2024+ data formats for playlists, podcasts and channels
- Add PODCAST content type support in lockupViewModel extraction
- Extract thumbnails and episode count from thumbnail overlay badges
- Migrate playlist page fetching from pbj=1 to innertube API (youtubei/v1/browse)
- Support new pageHeaderRenderer format in playlist metadata extraction
- Fix subscriber count extraction when YouTube returns handle instead of count
- Hide "None subscribers" in template when data is unavailable
2026-03-31 21:38:51 -05:00

154 lines
4.7 KiB
Python

from youtube import util, yt_data_extract, proto, local_playlist
from youtube import yt_app
import settings
import base64
import urllib
import json
import string
import gevent
import math
from flask import request, abort
import flask
def playlist_ctoken(playlist_id, offset, include_shorts=True):
offset = proto.uint(1, offset)
offset = b'PT:' + proto.unpadded_b64encode(offset)
offset = proto.string(15, offset)
if not include_shorts:
offset += proto.string(104, proto.uint(2, 1))
continuation_info = proto.string(3, proto.percent_b64encode(offset))
playlist_id = proto.string(2, 'VL' + playlist_id)
pointless_nest = proto.string(80226972, playlist_id + continuation_info)
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
def playlist_first_page(playlist_id, report_text="Retrieved playlist",
use_mobile=False):
# Use innertube API (pbj=1 no longer works for many playlists)
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
data = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'clientName': 'WEB',
'clientVersion': '2.20240327.00.00',
},
},
'browseId': 'VL' + playlist_id,
}
content_type_header = (('Content-Type', 'application/json'),)
content = util.fetch_url(
url, util.desktop_xhr_headers + content_type_header,
data=json.dumps(data),
report_text=report_text, debug_name='playlist_first_page'
)
return json.loads(content.decode('utf-8'))
def get_videos(playlist_id, page, include_shorts=True, use_mobile=False,
report_text='Retrieved playlist'):
page_size = 100
key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
ctoken = playlist_ctoken(playlist_id, (int(page)-1)*page_size,
include_shorts=include_shorts)
data = {
'context': {
'client': {
'hl': 'en',
'gl': 'US',
'clientName': 'WEB',
'clientVersion': '2.20240327.00.00',
},
},
'continuation': ctoken,
}
content_type_header = (('Content-Type', 'application/json'),)
content = util.fetch_url(
url, util.desktop_xhr_headers + content_type_header,
data=json.dumps(data),
report_text=report_text, debug_name='playlist_videos'
)
info = json.loads(content.decode('utf-8'))
return info
@yt_app.route('/playlist')
def get_playlist_page():
if 'list' not in request.args:
abort(400)
playlist_id = request.args.get('list')
# Radio/Mix playlists (RD...) only work as watch page, not playlist page
if playlist_id.startswith('RD'):
first_video_id = playlist_id[2:] # video ID after 'RD' prefix
return flask.redirect(
util.URL_ORIGIN + '/watch?v=' + first_video_id + '&list=' + playlist_id,
302
)
page = request.args.get('page', '1')
if page == '1':
first_page_json = playlist_first_page(playlist_id)
this_page_json = first_page_json
else:
tasks = (
gevent.spawn(
playlist_first_page, playlist_id,
report_text="Retrieved playlist info"
),
gevent.spawn(get_videos, playlist_id, page)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
first_page_json, this_page_json = tasks[0].value, tasks[1].value
info = yt_data_extract.extract_playlist_info(this_page_json)
if info['error']:
return flask.render_template('error.html', error_message=info['error'])
if page != '1':
info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
util.prefix_urls(info['metadata'])
for item in info.get('items', ()):
util.prefix_urls(item)
util.add_extra_html_info(item)
if 'id' in item and not item.get('thumbnail'):
item['thumbnail'] = f"{settings.img_prefix}https://i.ytimg.com/vi/{item['id']}/hqdefault.jpg"
item['url'] += '&list=' + playlist_id
if item['index']:
item['url'] += '&index=' + str(item['index'])
video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count')
if video_count is None:
video_count = 1000
return flask.render_template(
'playlist.html',
header_playlist_names=local_playlist.get_playlist_names(),
video_list=info.get('items', []),
num_pages=math.ceil(video_count/100),
parameters_dictionary=request.args,
**info['metadata']
).encode('utf-8')