Untitled

def clean_titles(titles):
    return titles.apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in final_stopwords]))

# Titles of videos without any of the keywords
titles_without_keywords = video_df[~video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]['title']
cleaned_titles_without_keywords = clean_titles(titles_without_keywords)

# Titles of videos with at least one of the keywords
titles_with_keywords = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]['title']
cleaned_titles_with_keywords = clean_titles(titles_with_keywords)


def plot_word_cloud(titles, file_name):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(titles))
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.savefig(file_name, bbox_inches='tight')
    plt.show()

# Plotting word clouds
plot_word_cloud(cleaned_titles_without_keywords, 'figs/wordcloud_without_keywords.png')

plot_word_cloud(cleaned_titles_with_keywords, 'figs/wordcloud_with_keywords.png')

# rever com calma palavras a serem removidas
Editor is loading...