Untitled
unknown
python
2 years ago
1.3 kB
32
Indexable
Never
from gtts import gTTS from requests import get from bs4 import BeautifulSoup from nltk.tokenize import sent_tokenize from playsound import playsound from time import sleep def get_DOM_by_url(url): x = BeautifulSoup(get(url).content, 'lxml') yield x.find('h1').text for p in x.find('div', {'class':'post_content'})('p'): try: p['class'] except KeyError: if p.text: yield p.text def extract_sentencies(parsed): for p in parsed: for s in sent_tokenize(p): yield s def to_speech(text_generator): n = 0 for phrase in text_generator: try: s = gTTS(phrase, lang='ru', slow=False) print(n, phrase) s.save('/tmp/out.mp3') playsound('/tmp/out.mp3') if not n: sleep(1) else: sleep(0.1) n += 1 except AssertionError: pass if __name__ == '__main__': import sys try: url = sys.argv[1] print(f'Playing {url}...') to_speech( extract_sentencies( get_DOM_by_url(url) ) ) except IndexError: print(f'use {sys.argv[0]} http://google.com/', file=sys.stderr)