Untitled
unknown
plain_text
a year ago
1.7 kB
4
Indexable
# Filtrando videos que possuem as palavras chaves videos_with_keywords = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)] # Filtrando videos que não possuem as palavras chaves videos_without_keywords = video_df[~video_df['title'].str.contains('|'.join(keywords), case=False, na=False)] # Filtrando videos que possuem as palavras excluídas videos_with_excluded_words = videos_with_keywords[videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)] # DF Contendo os títulos filtrados filtered_titles = videos_with_keywords[~videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)]['title'] # DF Contendo os títulos excluídos excluded_titles = pd.concat([videos_without_keywords, videos_with_excluded_words], axis=0)['title'] # Limpando os títulos removendo as stopwords def clean_titles(titles): return titles.apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in final_stopwords])) cleaned_excluded_titles = clean_titles(excluded_titles) cleaned_filtered_titles = clean_titles(filtered_titles) # Plotando as nuvens de palavras def plot_word_cloud(titles, file_name): wordcloud = WordCloud(width=1920, height=1080, background_color='white').generate(' '.join(titles)) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.savefig(file_name, bbox_inches='tight') plt.show() plot_word_cloud(cleaned_excluded_titles, 'figs/cleaned_excluded_titles.png') plot_word_cloud(cleaned_filtered_titles, 'figs/cleaned_filtered_titles.png')
Editor is loading...
Leave a Comment