Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
895 B
4
Indexable
Never
 Extract suffixes
df['Suffix'] = df['PN'].apply(extract_suffix)

# Check for duplicates
duplicates = df[df.duplicated(subset=['PN'], keep=False)]
print("Duplicate entries found:")
print(duplicates)

# Remove duplicates, keeping the first occurrence
df_no_duplicates = df.drop_duplicates(subset=['PN'], keep='first')

# Display the first few rows of the result
print("\nFirst few rows of the result after removing duplicates:")
print(df_no_duplicates.head())

# Save the result to a new CSV file
result_filename = 'result_no_duplicates_' + filename
df_no_duplicates.to_csv(result_filename, index=False)
files.download(result_filename)

print(f"\nResults saved to {result_filename}")

# Print summary
print(f"\nOriginal number of rows: {len(df)}")
print(f"Number of rows after removing duplicates: {len(df_no_duplicates)}")
print(f"Number of duplicates removed: {len(df) - len(df_no_duplicates)}")
Leave a Comment