Untitled
unknown
python
3 years ago
1.3 kB
45
Indexable
from gtts import gTTS
from requests import get
from bs4 import BeautifulSoup
from nltk.tokenize import sent_tokenize
from playsound import playsound
from time import sleep
def get_DOM_by_url(url):
x = BeautifulSoup(get(url).content, 'lxml')
yield x.find('h1').text
for p in x.find('div', {'class':'post_content'})('p'):
try:
p['class']
except KeyError:
if p.text:
yield p.text
def extract_sentencies(parsed):
for p in parsed:
for s in sent_tokenize(p):
yield s
def to_speech(text_generator):
n = 0
for phrase in text_generator:
try:
s = gTTS(phrase, lang='ru', slow=False)
print(n, phrase)
s.save('/tmp/out.mp3')
playsound('/tmp/out.mp3')
if not n:
sleep(1)
else:
sleep(0.1)
n += 1
except AssertionError:
pass
if __name__ == '__main__':
import sys
try:
url = sys.argv[1]
print(f'Playing {url}...')
to_speech(
extract_sentencies(
get_DOM_by_url(url)
)
)
except IndexError:
print(f'use {sys.argv[0]} http://google.com/', file=sys.stderr)
Editor is loading...