Untitled
unknown
plain_text
2 years ago
1.4 kB
2
Indexable
import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.cluster import KMeans from sklearn.ensemble import IsolationForest from sklearn.model_selection import train_test_split # Load data and preprocess data = pd.read_csv('credit_card_transactions.csv') data = data.drop_duplicates() data = data.dropna() data['datetime'] = pd.to_datetime(data['datetime']) data['hour'] = data['datetime'].dt.hour data = data.drop(['datetime'], axis=1) X = data.drop(['fraudulent'], axis=1) y = data['fraudulent'] # Feature engineering X['amount_log'] = np.log(X['amount']+1) X['hour_sin'] = np.sin(2*np.pi*X['hour']/24) X['hour_cos'] = np.cos(2*np.pi*X['hour']/24) X = X.drop(['hour'], axis=1) # Standardize features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Reduce dimensionality using PCA pca = PCA(n_components=10) X_pca = pca.fit_transform(X_scaled) # Cluster data using K-means kmeans = KMeans(n_clusters=4, random_state=0).fit(X_pca) X_cluster = kmeans.predict(X_pca) # Train isolation forest model X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2) model = IsolationForest(n_estimators=100, max_samples='auto', contamination='auto') model.fit(X_train) # Evaluate model performance y_pred = model.predict(X_test) accuracy = np.mean(y_pred == y_test) print('Accuracy:', accuracy)
Editor is loading...