Extraction: Detect limited state and fix false detection as unlisted

This commit is contained in:
James Taylor 2019-12-17 20:58:15 -08:00
parent 81c7ecf161
commit 45a4ab5ace
3 changed files with 14 additions and 2 deletions

View File

@ -252,6 +252,9 @@
{%- if age_restricted -%}
<li class="age-restricted">Age-restricted</li>
{%- endif -%}
{%- if limited_state -%}
<li>Limited state</li>
{%- endif -%}
</ul>
<address>Uploaded by <a href="{{ uploader_channel_url }}">{{ uploader }}</a></address>
<span class="views">{{ views }} views</span>

View File

@ -429,6 +429,7 @@ def get_watch_page():
uploader = info['author'],
description = info['description'],
unlisted = info['unlisted'],
limited_state = info['limited_state'],
age_restricted = info['age_restricted'],
playability_error = info['playability_error'],
)

View File

@ -1003,6 +1003,13 @@ def extract_watch_info_mobile(top_level):
info['comment_count'] = 0
info['comments_disabled'] = True
# check for limited state
items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'})
if items:
info['limited_state'] = True
else:
info['limited_state'] = False
# related videos
related, _ = extract_items(response)
info['related_videos'] = [renderer_info(renderer) for renderer in related]
@ -1015,6 +1022,7 @@ def extract_watch_info_desktop(top_level):
'comment_count': None,
'comments_disabled': None,
'allowed_countries': None,
'limited_state': None,
}
video_info = {}
@ -1201,7 +1209,7 @@ def extract_watch_info(polymer_json):
liberal_update(info, 'author', vd.get('author'))
liberal_update(info, 'author_id', vd.get('channelId'))
liberal_update(info, 'live', vd.get('isLiveContent'))
liberal_update(info, 'unlisted', not vd.get('isCrawlable', True))
conservative_update(info, 'unlisted', not vd.get('isCrawlable', True)) #isCrawlable is false on limited state videos even if they aren't unlisted
liberal_update(info, 'tags', vd.get('keywords', []))
# fallback stuff from microformat
@ -1213,7 +1221,7 @@ def extract_watch_info(polymer_json):
conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True))
conservative_update(info, 'author', mf.get('ownerChannelName'))
conservative_update(info, 'author_id', mf.get('externalChannelId'))
conservative_update(info, 'unlisted', mf.get('isUnlisted'))
liberal_update(info, 'unlisted', mf.get('isUnlisted'))
liberal_update(info, 'category', mf.get('category'))
liberal_update(info, 'published_date', mf.get('publishDate'))
liberal_update(info, 'uploaded_date', mf.get('uploadDate'))