Use BeautifulSoup to extract information from YT searches
Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches
This commit is contained in:
parent
abbf953e64
commit
8dbd068524
@ -14,9 +14,10 @@ Livie allows the user to search youtube.com and play the video from `mpv`.
|
|||||||
|
|
||||||
- `python >= 3.5`
|
- `python >= 3.5`
|
||||||
- `python-requests`
|
- `python-requests`
|
||||||
|
- `python-beautifulsoup4`
|
||||||
- `mpv`
|
- `mpv`
|
||||||
|
|
||||||
`sudo pacman -S python mpv python-requests`
|
`sudo pacman -S python mpv python-requests python-beautifulsoup4`
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
3
livie.el
3
livie.el
@ -47,8 +47,7 @@
|
|||||||
:group 'livie
|
:group 'livie
|
||||||
:type 'string)
|
:type 'string)
|
||||||
|
|
||||||
(defvar livie-youtube-regexp
|
(defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
|
||||||
"https://invidio.us/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true")
|
|
||||||
|
|
||||||
(define-derived-mode livie-mode
|
(define-derived-mode livie-mode
|
||||||
special-mode "livie"
|
special-mode "livie"
|
||||||
|
54
livie.py
54
livie.py
@ -1,34 +1,38 @@
|
|||||||
"""This module does render video"""
|
"""This module does render video"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
URL = 'https://invidio.us'
|
URL = 'https://www.youtube.com'
|
||||||
|
FILTER = '&sp=EgIQAQ%253D%253D'
|
||||||
INPUT = sys.argv[1]
|
INPUT = sys.argv[1]
|
||||||
SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
|
SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
|
||||||
REQUEST = requests.get(SEARCH)
|
REQUEST = requests.get(SEARCH)
|
||||||
SD = '&itag=18&local=true'
|
SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
|
||||||
HD = '&itag=22&local=true'
|
FIRST = True
|
||||||
|
|
||||||
FIRST = True # skip line loop
|
|
||||||
|
|
||||||
VIDEOS = json.loads(REQUEST.content.decode('utf-8'))
|
def replace(string):
|
||||||
|
"""Remove unnecessary characters"""
|
||||||
|
string = string.replace(' - Duration: ', '')
|
||||||
|
string = string.replace('.', '')
|
||||||
|
string = string.replace(' views', '')
|
||||||
|
return string
|
||||||
|
|
||||||
for video in VIDEOS:
|
for vid in SOUP.find_all(class_='yt-lockup-content'):
|
||||||
try:
|
try:
|
||||||
title = video.get('title', '')
|
link = URL + vid.h3.a['href']
|
||||||
videoid = video.get('videoId', '')
|
title = vid.h3.a.text
|
||||||
author = video.get('author', '')
|
description = vid.h3.span.text
|
||||||
|
author = vid.find(class_='yt-lockup-byline').a.text
|
||||||
|
meta = vid.find(class_='yt-lockup-meta').ul.contents
|
||||||
|
time_srt = vid.find(class_='yt-lockup-title').span.text
|
||||||
|
time = replace(time_srt)
|
||||||
|
uploaded = meta[0].text
|
||||||
|
views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
|
||||||
|
views = replace(views_str.text)
|
||||||
|
|
||||||
# Make URL
|
|
||||||
sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
|
|
||||||
hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
|
|
||||||
|
|
||||||
timer = video.get('lengthSeconds', '')
|
|
||||||
time = str(datetime.timedelta(seconds=timer))
|
|
||||||
publish = video.get('publishedText', '')
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -38,9 +42,9 @@ for video in VIDEOS:
|
|||||||
print() # print skip line
|
print() # print skip line
|
||||||
|
|
||||||
# prints
|
# prints
|
||||||
print(' title: %s' % (title))
|
print(' title: %s' % title)
|
||||||
print(' SD: %s' % (sd))
|
print(' url: %s' % link)
|
||||||
print(' HD: %s' % (hd))
|
print(' channel: %s' % author)
|
||||||
print(' HD ^ Only some videos available caused by DRM')
|
print(' uploaded: %s' % uploaded)
|
||||||
print(' channel: %s' % (author))
|
print(' time: %s' % time)
|
||||||
print(' time: %s' % (time))
|
print(' views: %s' % views)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user