Fix music list extraction

Closes #160
This commit is contained in:
Jesus E 2023-05-28 21:42:13 -04:00
parent 512798366c
commit aa57ace742
No known key found for this signature in database
GPG Key ID: 159C8F8BC9AED8B6
4 changed files with 39 additions and 1 deletions

View File

@ -135,7 +135,11 @@
{% for track in music_list %}
<tr>
{% for attribute in music_attributes %}
<td>{{ track.get(attribute.lower(), '') }}</td>
{% if attribute.lower() == 'title' and track['url'] is not none %}
<td><a href="{{ track['url'] }}">{{ track.get(attribute.lower(), '') }}</a></td>
{% else %}
<td>{{ track.get(attribute.lower(), '') }}</td>
{% endif %}
{% endfor %}
</tr>
{% endfor %}

View File

@ -688,6 +688,8 @@ def get_watch_page(video_id=None):
for item in info['related_videos']:
util.prefix_urls(item)
util.add_extra_html_info(item)
for song in info['music_list']:
song['url'] = util.prefix_url(song['url'])
if info['playlist']:
playlist_id = info['playlist']['id']
for item in info['playlist']['items']:

View File

@ -296,6 +296,9 @@ def extract_item_info(item, additional_info={}):
if primary_type == 'video':
info['id'] = item.get('videoId')
if not info['id']:
info['id'] = deep_get(item,'navigationEndpoint', 'watchEndpoint',
'videoId')
info['view_count'] = extract_int(item.get('viewCountText'))
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published

View File

@ -231,6 +231,34 @@ def _extract_metadata_row_info(renderer_content):
return info
def _extract_from_music_renderer(renderer_content):
# latest format for the music list
info = {
'music_list': [],
}
for carousel in renderer_content.get('carouselLockups', []):
song = {}
carousel = carousel.get('carouselLockupRenderer', {})
video_renderer = carousel.get('videoLockup', {})
video_renderer_info = extract_item_info(video_renderer)
video_id = video_renderer_info.get('id')
song['url'] = concat_or_none('https://www.youtube.com/watch?v=',
video_id)
song['title'] = video_renderer_info.get('title')
for row in carousel.get('infoRows', []):
row = row.get('infoRowRenderer', {})
title = extract_str(row.get('title'))
data = extract_str(row.get('defaultMetadata'))
if title == 'ARTIST':
song['artist'] = data
elif title == 'ALBUM':
song['album'] = data
elif title == 'WRITERS':
song['writers'] = data
info['music_list'].append(song)
return info
def _extract_from_video_metadata(renderer_content):
info = _extract_from_video_information_renderer(renderer_content)
liberal_dict_update(info, _extract_likes_dislikes(renderer_content))
@ -254,6 +282,7 @@ visible_extraction_dispatch = {
'slimVideoActionBarRenderer': _extract_likes_dislikes,
'slimOwnerRenderer': _extract_from_owner_renderer,
'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer,
'videoDescriptionMusicSectionRenderer': _extract_from_music_renderer,
'expandableVideoDescriptionRenderer': _extract_from_description_renderer,
'metadataRowContainerRenderer': _extract_metadata_row_info,
# OR just this one, which contains SOME of the above inside it