list music used in video if available

This commit is contained in:
James Taylor 2018-08-06 19:25:14 -07:00
parent 09837e9fa6
commit b02b5b87b3
4 changed files with 97 additions and 3 deletions

View File

@ -227,11 +227,15 @@ address{
grid-column: 1 / span 2; grid-column: 1 / span 2;
grid-row: 6; grid-row: 6;
} }
.full-item .music-list{
grid-row:7;
grid-column: 1 / span 2;
}
.full-item .comments{ .full-item .comments{
grid-row: 7; grid-row: 8;
} }
.full-item .more-comments{ .full-item .more-comments{
grid-row: 8; grid-row: 9;
} }
.medium-item-box{ .medium-item-box{

View File

@ -296,6 +296,10 @@ def extract_info(downloader, *args, **kwargs):
except YoutubeError as e: except YoutubeError as e:
return str(e) return str(e)
music_list_table_row = Template('''<tr>
<td>$attribute</td>
<td>$value</td>
''')
def get_watch_page(query_string): def get_watch_page(query_string):
id = urllib.parse.parse_qs(query_string)['v'][0] id = urllib.parse.parse_qs(query_string)['v'][0]
downloader = YoutubeDL(params={'youtube_include_dash_manifest':False}) downloader = YoutubeDL(params={'youtube_include_dash_manifest':False})
@ -343,6 +347,31 @@ def get_watch_page(query_string):
else: else:
related_videos_html = '' related_videos_html = ''
music_list = info['music_list']
if len(music_list) == 0:
music_list_html = ''
else:
music_list_html = '''<hr>
<table>
<caption>Music</caption>
<tr>
<th>Artist</th>
<th>Title</th>
<th>Album</th>
</tr>
'''
for track in music_list:
music_list_html += '''<tr>\n'''
for attribute in ('artist', 'title', 'album'):
try:
value = track[attribute]
except KeyError:
music_list_html += '''<td></td>'''
else:
music_list_html += '''<td>''' + html.escape(value) + '''</td>'''
music_list_html += '''</tr>\n'''
music_list_html += '''</table>\n'''
download_options = '' download_options = ''
@ -371,5 +400,6 @@ def get_watch_page(query_string):
related = related_videos_html, related = related_videos_html,
comments = comments_html, comments = comments_html,
more_comments_button = more_comments_button, more_comments_button = more_comments_button,
music_list = music_list_html,
) )
return page return page

View File

@ -10,6 +10,7 @@ import random
import re import re
import time import time
import traceback import traceback
import html
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
@ -1479,6 +1480,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}) })
return chapters return chapters
ul_tag_pattern = re.compile(r'(</?ul)')
music_info_pattern = re.compile(r'<h4 class="title">\s*(Song|Music|Artist|Album)\s*</h4>\s*<ul class="content watch-info-tag-list">\s*<li>(?:<a[^>]*>)?([^<]*)(?:</a>)?</li>')
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -1528,6 +1531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_view_count(v_info): def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
# Related videos
related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='') related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='')
if related_vid_info == '': if related_vid_info == '':
@ -1540,6 +1544,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
related_vids = (compat_parse_qs(related_item) for related_item in related_vid_info.split(",")) related_vids = (compat_parse_qs(related_item) for related_item in related_vid_info.split(","))
related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vids] related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vids]
# Music list
# Test case: https://www.youtube.com/watch?v=jbkZdRglnKY
music_list = []
metadata_start = video_webpage.find('<ul class="watch-extras-section">')
if metadata_start != -1:
metadata_start += 33
tag_index = metadata_start
open_tags = 1
while open_tags > 0:
match = self.ul_tag_pattern.search(video_webpage, tag_index)
if match is None:
print("Couldn't match ul tag")
break
tag_index = match.end()
tag = match.group(1)
if tag == "<ul":
open_tags += 1
else:
open_tags -= 1
else:
last_index = 0
metadata = video_webpage[metadata_start:tag_index]
current_song = None
while True:
match = self.music_info_pattern.search(metadata, last_index)
if match is None:
if current_song is not None:
music_list.append(current_song)
break
title, value = match.group(1), html.unescape(match.group(2))
if title in ("Song", "Music"):
if current_song is not None:
music_list.append(current_song)
current_song = {"title": value}
else:
current_song[title.lower()] = value
last_index = match.end()
# Get video info # Get video info
embed_webpage = None embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None: if re.search(r'player-age-gate-content">', video_webpage) is not None:
@ -2120,6 +2162,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'track': track, 'track': track,
'artist': artist, 'artist': artist,
'related_vids': related_vids, 'related_vids': related_vids,
'music_list': music_list,
} }

View File

@ -23,6 +23,21 @@
.full-item{ .full-item{
grid-column: 2; grid-column: 2;
} }
.music-list{
background-color: #d0d0d0;
}
.music-list table,th,td{
border: 1px solid;
}
.music-list th,td{
padding-left:4px;
padding-right:5px;
}
.music-list caption{
text-align:left;
font-weight:bold;
margin-bottom:5px;
}
.comments{ .comments{
grid-column: 1 / span 2; grid-column: 1 / span 2;
grid-row: 6; grid-row: 6;
@ -106,7 +121,9 @@ $download_options
<input class="checkbox" name="video_info_list" value="$video_info" form="playlist-edit" type="checkbox"> <input class="checkbox" name="video_info_list" value="$video_info" form="playlist-edit" type="checkbox">
<span class="description">$description</span> <span class="description">$description</span>
<div class="music-list">
$music_list
</div>
<section class="comments"> <section class="comments">
$comments $comments
</section> </section>