Untitled
unknown
plain_text
a year ago
1.2 kB
8
Indexable
def clean_titles(titles):
return titles.apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in final_stopwords]))
# Titles of videos without any of the keywords
titles_without_keywords = video_df[~video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]['title']
cleaned_titles_without_keywords = clean_titles(titles_without_keywords)
# Titles of videos with at least one of the keywords
titles_with_keywords = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]['title']
cleaned_titles_with_keywords = clean_titles(titles_with_keywords)
def plot_word_cloud(titles, file_name):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(titles))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig(file_name, bbox_inches='tight')
plt.show()
# Plotting word clouds
plot_word_cloud(cleaned_titles_without_keywords, 'figs/wordcloud_without_keywords.png')
plot_word_cloud(cleaned_titles_with_keywords, 'figs/wordcloud_with_keywords.png')
# rever com calma palavras a serem removidasEditor is loading...
Leave a Comment