Untitled
unknown
plain_text
a year ago
1.7 kB
7
Indexable
# Filtrando videos que possuem as palavras chaves
videos_with_keywords = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]
# Filtrando videos que não possuem as palavras chaves
videos_without_keywords = video_df[~video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]
# Filtrando videos que possuem as palavras excluídas
videos_with_excluded_words = videos_with_keywords[videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)]
# DF Contendo os títulos filtrados
filtered_titles = videos_with_keywords[~videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)]['title']
# DF Contendo os títulos excluídos
excluded_titles = pd.concat([videos_without_keywords, videos_with_excluded_words], axis=0)['title']
# Limpando os títulos removendo as stopwords
def clean_titles(titles):
return titles.apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in final_stopwords]))
cleaned_excluded_titles = clean_titles(excluded_titles)
cleaned_filtered_titles = clean_titles(filtered_titles)
# Plotando as nuvens de palavras
def plot_word_cloud(titles, file_name):
wordcloud = WordCloud(width=1920, height=1080, background_color='white').generate(' '.join(titles))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig(file_name, bbox_inches='tight')
plt.show()
plot_word_cloud(cleaned_excluded_titles, 'figs/cleaned_excluded_titles.png')
plot_word_cloud(cleaned_filtered_titles, 'figs/cleaned_filtered_titles.png')Editor is loading...
Leave a Comment