Untitled
unknown
plain_text
a year ago
1.7 kB
6
Indexable
# Two options: # Option 1. Filtering by substring: This means if we include "cig" in the set of keywords, titles with words like "e-cig" or "cigarettes" will be matched. # Option 2. Filtering by exact word: In this case, only titles containing the exact word "cig" will be matched, regardless of case. This requires creating sets with plurals, for example, "cigarette" and "cigarettes". # # Option 1 # # Count videos that contain at least one keyword in the title # videos_with_keyword = video_df[video_df['title'].str.contains('|'.join(keywords), case=False, na=False)] # Option 2 # Create a regular expression pattern for exact words pattern = r'\b(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')\b' # Filter videos based on regular expression pattern videos_with_keyword = video_df[video_df['title'].str.contains(pattern, case=False, na=False)] ##Finally # Custom list of words to exclude exclude_words = [ "podcast", "music", "time", "lyric", "Andaman", "Nicobar", "island", "Alan Walker", "Music Video", "type beat", "Lucid Dream", "Juice WRLD", "WRLD", "rugby", "Mobile Legend", "drink", "asmr", "ft", "feat", "Weed", "Food", "vlog", "cbd", "acer", "Joe Rogan" ] # Create a regex pattern for the words to exclude exclude_pattern = r'\b(?:' + '|'.join(re.escape(word) for word in exclude_words) + r')\b' # Filter out videos with titles containing any of the exclude words videos_with_keyword = videos_with_keyword[~videos_with_keyword['title'].str.contains(exclude_pattern, case=False, na=False)] print("Total number of videos:", len(video_df)) print("Number of videos with at least one keyword in the title:", len(videos_with_keyword))
Editor is loading...
Leave a Comment