Switch to new comments api now that old one is being disabled
watch_comment api periodically gives the error "Top level comments mweb servlet is turned down." The continuation items for the new api are in a different arrangement in the json, so changes were necessary to the extract_items function. Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
parent
bee14ea9ea
commit
3dee7ea0d1
@ -47,25 +47,23 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
|
|||||||
return base64.urlsafe_b64encode(result).decode('ascii')
|
return base64.urlsafe_b64encode(result).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
mobile_headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
|
|
||||||
'Accept': '*/*',
|
|
||||||
'Accept-Language': 'en-US,en;q=0.5',
|
|
||||||
'X-YouTube-Client-Name': '2',
|
|
||||||
'X-YouTube-Client-Version': '2.20180823',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request_comments(ctoken, replies=False):
|
def request_comments(ctoken, replies=False):
|
||||||
base_url = 'https://m.youtube.com/watch_comment?'
|
url = 'https://m.youtube.com/youtubei/v1/next'
|
||||||
if replies:
|
url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
||||||
base_url += 'action_get_comment_replies=1&ctoken='
|
data = json.dumps({
|
||||||
else:
|
'context': {
|
||||||
base_url += 'action_get_comments=1&ctoken='
|
'client': {
|
||||||
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
|
'hl': 'en',
|
||||||
|
'gl': 'US',
|
||||||
|
'clientName': 'MWEB',
|
||||||
|
'clientVersion': '2.20210804.02.00',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'continuation': ctoken.replace('=', '%3D'),
|
||||||
|
})
|
||||||
|
|
||||||
content = util.fetch_url(
|
content = util.fetch_url(
|
||||||
url, headers=mobile_headers,
|
url, headers=util.mobile_xhr_headers + util.json_header, data=data,
|
||||||
report_text='Retrieved comments', debug_name='request_comments')
|
report_text='Retrieved comments', debug_name='request_comments')
|
||||||
content = content.decode('utf-8')
|
content = content.decode('utf-8')
|
||||||
|
|
||||||
@ -178,10 +176,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
|
|||||||
('Direct link', this_sort_url)
|
('Direct link', this_sort_url)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
ctoken = make_comment_ctoken(video_id, sort, offset, lc)
|
||||||
comments_info.update(yt_data_extract.extract_comments_info(
|
comments_info.update(yt_data_extract.extract_comments_info(
|
||||||
request_comments(
|
request_comments(ctoken), ctoken=ctoken
|
||||||
make_comment_ctoken(video_id, sort, offset, lc, secret_key)
|
|
||||||
)
|
|
||||||
))
|
))
|
||||||
post_process_comments_info(comments_info)
|
post_process_comments_info(comments_info)
|
||||||
|
|
||||||
@ -212,7 +209,9 @@ def get_comments_page():
|
|||||||
ctoken = request.args.get('ctoken', '')
|
ctoken = request.args.get('ctoken', '')
|
||||||
replies = request.args.get('replies', '0') == '1'
|
replies = request.args.get('replies', '0') == '1'
|
||||||
|
|
||||||
comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
|
comments_info = yt_data_extract.extract_comments_info(
|
||||||
|
request_comments(ctoken, replies), ctoken=ctoken
|
||||||
|
)
|
||||||
post_process_comments_info(comments_info)
|
post_process_comments_info(comments_info)
|
||||||
|
|
||||||
if not replies:
|
if not replies:
|
||||||
|
@ -387,6 +387,19 @@ mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M)
|
|||||||
mobile_ua = (('User-Agent', mobile_user_agent),)
|
mobile_ua = (('User-Agent', mobile_user_agent),)
|
||||||
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
|
||||||
desktop_ua = (('User-Agent', desktop_user_agent),)
|
desktop_ua = (('User-Agent', desktop_user_agent),)
|
||||||
|
json_header = (('Content-Type', 'application/json'),)
|
||||||
|
desktop_xhr_headers = (
|
||||||
|
('Accept', '*/*'),
|
||||||
|
('Accept-Language', 'en-US,en;q=0.5'),
|
||||||
|
('X-YouTube-Client-Name', '1'),
|
||||||
|
('X-YouTube-Client-Version', '2.20180830'),
|
||||||
|
) + desktop_ua
|
||||||
|
mobile_xhr_headers = (
|
||||||
|
('Accept', '*/*'),
|
||||||
|
('Accept-Language', 'en-US,en;q=0.5'),
|
||||||
|
('X-YouTube-Client-Name', '2'),
|
||||||
|
('X-YouTube-Client-Version', '2.20180830'),
|
||||||
|
) + mobile_ua
|
||||||
|
|
||||||
|
|
||||||
class RateLimitedQueue(gevent.queue.Queue):
|
class RateLimitedQueue(gevent.queue.Queue):
|
||||||
|
@ -478,6 +478,22 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
|
|||||||
|
|
||||||
renderer = None
|
renderer = None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_items_from_renderer_list(renderers, item_types=_item_types):
|
||||||
|
'''Same as extract_items_from_renderer, but provide a list of renderers'''
|
||||||
|
items = []
|
||||||
|
ctoken = None
|
||||||
|
for renderer in renderers:
|
||||||
|
new_items, new_ctoken = extract_items_from_renderer(
|
||||||
|
renderer,
|
||||||
|
item_types=item_types)
|
||||||
|
items += new_items
|
||||||
|
# prioritize ctoken associated with items
|
||||||
|
if (not ctoken) or (new_ctoken and new_items):
|
||||||
|
ctoken = new_ctoken
|
||||||
|
return items, ctoken
|
||||||
|
|
||||||
|
|
||||||
def extract_items(response, item_types=_item_types,
|
def extract_items(response, item_types=_item_types,
|
||||||
search_engagement_panels=False):
|
search_engagement_panels=False):
|
||||||
'''return items, ctoken'''
|
'''return items, ctoken'''
|
||||||
@ -495,6 +511,15 @@ def extract_items(response, item_types=_item_types,
|
|||||||
item_types=item_types)
|
item_types=item_types)
|
||||||
if items:
|
if items:
|
||||||
break
|
break
|
||||||
|
elif 'onResponseReceivedEndpoints' in response:
|
||||||
|
for endpoint in response.get('onResponseReceivedEndpoints', []):
|
||||||
|
items, ctoken = extract_items_from_renderer_list(
|
||||||
|
deep_get(endpoint, 'appendContinuationItemsAction',
|
||||||
|
'continuationItems', default=[]),
|
||||||
|
item_types=item_types,
|
||||||
|
)
|
||||||
|
if items:
|
||||||
|
break
|
||||||
elif 'contents' in response:
|
elif 'contents' in response:
|
||||||
renderer = get(response, 'contents', {})
|
renderer = get(response, 'contents', {})
|
||||||
items, ctoken = extract_items_from_renderer(
|
items, ctoken = extract_items_from_renderer(
|
||||||
@ -502,11 +527,11 @@ def extract_items(response, item_types=_item_types,
|
|||||||
item_types=item_types)
|
item_types=item_types)
|
||||||
|
|
||||||
if search_engagement_panels and 'engagementPanels' in response:
|
if search_engagement_panels and 'engagementPanels' in response:
|
||||||
for engagement_renderer in response['engagementPanels']:
|
new_items, new_ctoken = extract_items_from_renderer_list(
|
||||||
additional_items, cont = extract_items_from_renderer(
|
response['engagementPanels'], item_types=item_types
|
||||||
engagement_renderer,
|
)
|
||||||
item_types=item_types)
|
items += new_items
|
||||||
items += additional_items
|
if (not ctoken) or (new_ctoken and new_items):
|
||||||
if cont and not ctoken:
|
ctoken = new_ctoken
|
||||||
ctoken = cont
|
|
||||||
return items, ctoken
|
return items, ctoken
|
||||||
|
@ -222,15 +222,13 @@ def _ctoken_metadata(ctoken):
|
|||||||
result['sort'] = 0
|
result['sort'] = 0
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def extract_comments_info(polymer_json):
|
def extract_comments_info(polymer_json, ctoken=None):
|
||||||
response, err = extract_response(polymer_json)
|
response, err = extract_response(polymer_json)
|
||||||
if err:
|
if err:
|
||||||
return {'error': err}
|
return {'error': err}
|
||||||
info = {'error': None}
|
info = {'error': None}
|
||||||
|
|
||||||
url = multi_deep_get(polymer_json, [1, 'url'], ['url'])
|
if ctoken:
|
||||||
if url:
|
|
||||||
ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
|
|
||||||
metadata = _ctoken_metadata(ctoken)
|
metadata = _ctoken_metadata(ctoken)
|
||||||
else:
|
else:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user