Use BeautifulSoup to extract information from YT searches

Google is blocking IP's the instances of invidio, because of these reasons
livie will use BeautifulSoup to extract information from YT searches
This commit is contained in:
Jesús 2019-11-27 16:23:59 -05:00
parent abbf953e64
commit 8dbd068524
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
3 changed files with 32 additions and 28 deletions

View File

@ -14,9 +14,10 @@ Livie allows the user to search youtube.com and play the video from `mpv`.
- `python >= 3.5` - `python >= 3.5`
- `python-requests` - `python-requests`
- `python-beautifulsoup4`
- `mpv` - `mpv`
`sudo pacman -S python mpv python-requests` `sudo pacman -S python mpv python-requests python-beautifulsoup4`
## Installation ## Installation

View File

@ -47,8 +47,7 @@
:group 'livie :group 'livie
:type 'string) :type 'string)
(defvar livie-youtube-regexp (defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
"https://invidio.us/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true")
(define-derived-mode livie-mode (define-derived-mode livie-mode
special-mode "livie" special-mode "livie"

View File

@ -1,34 +1,38 @@
"""This module does render video""" """This module does render video"""
import sys import sys
import datetime
import json
import requests import requests
from bs4 import BeautifulSoup
URL = 'https://invidio.us' URL = 'https://www.youtube.com'
FILTER = '&sp=EgIQAQ%253D%253D'
INPUT = sys.argv[1] INPUT = sys.argv[1]
SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT) SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
REQUEST = requests.get(SEARCH) REQUEST = requests.get(SEARCH)
SD = '&itag=18&local=true' SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
HD = '&itag=22&local=true' FIRST = True
FIRST = True # skip line loop
VIDEOS = json.loads(REQUEST.content.decode('utf-8')) def replace(string):
"""Remove unnecessary characters"""
string = string.replace(' - Duration: ', '')
string = string.replace('.', '')
string = string.replace(' views', '')
return string
for video in VIDEOS: for vid in SOUP.find_all(class_='yt-lockup-content'):
try: try:
title = video.get('title', '') link = URL + vid.h3.a['href']
videoid = video.get('videoId', '') title = vid.h3.a.text
author = video.get('author', '') description = vid.h3.span.text
author = vid.find(class_='yt-lockup-byline').a.text
meta = vid.find(class_='yt-lockup-meta').ul.contents
time_srt = vid.find(class_='yt-lockup-title').span.text
time = replace(time_srt)
uploaded = meta[0].text
views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
views = replace(views_str.text)
# Make URL
sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
timer = video.get('lengthSeconds', '')
time = str(datetime.timedelta(seconds=timer))
publish = video.get('publishedText', '')
except TypeError: except TypeError:
continue continue
@ -38,9 +42,9 @@ for video in VIDEOS:
print() # print skip line print() # print skip line
# prints # prints
print(' title: %s' % (title)) print(' title: %s' % title)
print(' SD: %s' % (sd)) print(' url: %s' % link)
print(' HD: %s' % (hd)) print(' channel: %s' % author)
print(' HD ^ Only some videos available caused by DRM') print(' uploaded: %s' % uploaded)
print(' channel: %s' % (author)) print(' time: %s' % time)
print(' time: %s' % (time)) print(' views: %s' % views)