Usage yt-local as scrapping

This commit is contained in:
Jesús 2021-01-15 19:25:40 -05:00
parent 938265700a
commit 1dbd4a11e3
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
4 changed files with 27 additions and 31 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.xml

View File

@ -10,9 +10,12 @@ Livie allows the user to search youtube.com and play the video from `mpv`.
- `python >= 3.5` - `python >= 3.5`
- `python-requests` - `python-requests`
- `python-beautifulsoup4`
- `python-lxml`
- `hypervideo`
- `mpv` - `mpv`
`sudo pacman -S python mpv python-requests` `sudo pacman -S python mpv python-requests python-beautifulsoup4 python-lxml hypervideo`
## Installation ## Installation

View File

@ -47,10 +47,7 @@
:group 'livie :group 'livie
:type 'string) :type 'string)
(defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
(defvar livie-youtube-regexp
"https://\\<\\(invidious.snopyta.org\\|invidio.us\\)[df]?\\>/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true"
)
(define-derived-mode livie-mode (define-derived-mode livie-mode
special-mode "livie" special-mode "livie"

View File

@ -1,34 +1,29 @@
"""This module does render video""" """This module does render video"""
import sys import sys
import datetime
import json
import requests import requests
from bs4 import BeautifulSoup
URL = 'https://invidio.us' URL = 'https://yt.conocimientoslibres.ga/youtube.com/'
INPUT = sys.argv[1] INPUT = sys.argv[1]
SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT) FILTER = '&type=1'
SEARCH = '%ssearch?query=%s%s' % (URL, INPUT, FILTER)
REQUEST = requests.get(SEARCH) REQUEST = requests.get(SEARCH)
SD = '&itag=18&local=true' SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
HD = '&itag=22&local=true' # skip line loop
FIRST = True
FIRST = True # skip line loop articles = SOUP.find_all('article', class_="item-box")
VIDEOS = json.loads(REQUEST.content.decode('utf-8')) for article in articles:
for video in VIDEOS:
try: try:
title = video.get('title', '') title = article.h4.text
videoid = video.get('videoId', '') link = article.a['href'].replace('/', '', 1)
author = video.get('author', '') author = article.address.text
time = article.p.text
uploaded = article.span.text
views = article.find('div', class_="views").text
# Make URL
sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
timer = video.get('lengthSeconds', '')
time = str(datetime.timedelta(seconds=timer))
publish = video.get('publishedText', '')
except TypeError: except TypeError:
continue continue
@ -38,9 +33,9 @@ for video in VIDEOS:
print() # print skip line print() # print skip line
# prints # prints
print(' title: %s' % (title)) print(' title: %s' % title)
print(' SD: %s' % (sd)) print(' url: %s' % link)
print(' HD: %s' % (hd)) print(' channel: %s' % author)
print(' HD ^ Only some videos available caused by DRM') print(' uploaded: %s' % uploaded)
print(' channel: %s' % (author)) print(' time: %s' % time)
print(' time: %s' % (time)) print(' views: %s' % views)