Untitled

 avatar
unknown
plain_text
a year ago
1.7 kB
4
Indexable
# Filtrando videos que possuem as palavras chaves
videos_with_keywords = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]
# Filtrando videos que não possuem as palavras chaves
videos_without_keywords = video_df[~video_df['title'].str.contains('|'.join(keywords), case=False, na=False)]

# Filtrando videos que possuem as palavras excluídas
videos_with_excluded_words = videos_with_keywords[videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)]

# DF Contendo os títulos filtrados
filtered_titles = videos_with_keywords[~videos_with_keywords['title'].str.contains('|'.join(exclude_words), case=False, na=False)]['title']
# DF Contendo os títulos excluídos
excluded_titles = pd.concat([videos_without_keywords, videos_with_excluded_words], axis=0)['title']

# Limpando os títulos removendo as stopwords
def clean_titles(titles):
    return titles.apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in final_stopwords]))

cleaned_excluded_titles = clean_titles(excluded_titles)
cleaned_filtered_titles = clean_titles(filtered_titles)

# Plotando as nuvens de palavras
def plot_word_cloud(titles, file_name):
    wordcloud = WordCloud(width=1920, height=1080, background_color='white').generate(' '.join(titles))
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.savefig(file_name, bbox_inches='tight')
    plt.show()

plot_word_cloud(cleaned_excluded_titles, 'figs/cleaned_excluded_titles.png')
plot_word_cloud(cleaned_filtered_titles, 'figs/cleaned_filtered_titles.png')
Editor is loading...
Leave a Comment