Untitled
unknown
python
a year ago
5.3 kB
4
Indexable
# Train the final LSTM_US_SA model with the best parameters model = Sequential(name="LSTM_US_SA_Model_Final") model.add(LSTM(best_params['lstm_units'], input_shape=(1, sequences.shape[1]), dropout=best_params['dropout_rate'], recurrent_dropout=best_params['recurrent_dropout_rate'], name="LSTM_Layer")) model.add(Flatten(name="Flatten_Layer")) model.add(Dense(3, activation='softmax', name="Output_Layer")) optimizer = AdamW(learning_rate=best_params['learning_rate']) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) # Add new columns to us_df us_df['predicted_fold_sentiment'] = [[] for _ in range(len(us_df))] us_df['predicted_sentiment'] = None # Cross-validation with the best parameters skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) n_folds = skf.get_n_splits() split_indices = {i: (train_index, val_index) for i, (train_index, val_index) in enumerate(skf.split(sequences, labels))} # Initialize metrics storage train_accuracy_scores, val_accuracy_scores = [], [] train_precision_scores, val_precision_scores = [], [] train_recall_scores, val_recall_scores = [], [] train_f1_scores, val_f1_scores = [], [] train_roc_auc_scores, val_roc_auc_scores = [], [] fold_histories = [] all_y_true, all_y_pred = [], [] # Initialize class-specific metrics storage class_names = [0, 1, 2] # Assuming the classes are 0, 1, and 2 precision_per_class = {cls: [] for cls in class_names} recall_per_class = {cls: [] for cls in class_names} f1_per_class = {cls: [] for cls in class_names} # Train and evaluate the model for each fold for i, (train_index, val_index) in tqdm(split_indices.items(), total=n_folds, desc="Folds"): X_train_fold, X_val_fold = sequences[train_index], sequences[val_index] y_train_fold, y_val_fold = labels[train_index], labels[val_index] X_train_fold = np.expand_dims(X_train_fold, axis=1) X_val_fold = np.expand_dims(X_val_fold, axis=1) early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.0001) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001) class_weight = dict(enumerate(compute_class_weight(class_weight="balanced", classes=np.unique(y_train_fold), y=y_train_fold))) history = model.fit(X_train_fold, y_train_fold, validation_data=(X_val_fold, y_val_fold), epochs=100, batch_size=best_params['batch_size'], class_weight=class_weight, callbacks=[early_stopping, reduce_lr]) y_train_pred = np.argmax(model.predict(X_train_fold), axis=1) y_val_pred = np.argmax(model.predict(X_val_fold), axis=1) all_y_true.extend(y_val_fold) all_y_pred.extend(y_val_pred) train_accuracy_scores.append(accuracy_score(y_train_fold, y_train_pred)) val_accuracy_scores.append(accuracy_score(y_val_fold, y_val_pred)) train_precision_scores.append(precision_score(y_train_fold, y_train_pred, average='weighted')) val_precision_scores.append(precision_score(y_val_fold, y_val_pred, average='weighted')) train_recall_scores.append(recall_score(y_train_fold, y_train_pred, average='weighted')) val_recall_scores.append(recall_score(y_val_fold, y_val_pred, average='weighted')) train_f1_scores.append(f1_score(y_train_fold, y_train_pred, average='weighted')) val_f1_scores.append(f1_score(y_val_fold, y_val_pred, average='weighted')) train_roc_auc_scores.append(roc_auc_score(label_binarize(y_train_fold, classes=[0, 1, 2]), label_binarize(y_train_pred, classes=[0, 1, 2]), average='weighted', multi_class='ovr')) val_roc_auc_scores.append(roc_auc_score(label_binarize(y_val_fold, classes=[0, 1, 2]), label_binarize(y_val_pred, classes=[0, 1, 2]), average='weighted', multi_class='ovr')) # Calculate class-specific metrics for this fold for cls in class_names: precision_per_class[cls].append(precision_score(y_val_fold, y_val_pred, labels=[cls], average='macro', zero_division=0)) recall_per_class[cls].append(recall_score(y_val_fold, y_val_pred, labels=[cls], average='macro', zero_division=0)) f1_per_class[cls].append(f1_score(y_val_fold, y_val_pred, labels=[cls], average='macro', zero_division=0)) fold_histories.append(history) for idx, val_idx in enumerate(val_index): us_df.at[val_idx, 'predicted_fold_sentiment'].append(y_val_pred[idx]) # Majority voting for predicted sentiment def majority_vote(sentiments): return Counter(sentiments).most_common(1)[0][0] us_df['predicted_sentiment'] = us_df['predicted_fold_sentiment'].apply(majority_vote) # Save results to CSV output_df = pd.DataFrame({'id_str': us_df['id_str'], 'full_text': us_df['full_text'], 'sentiment': us_df['sentiment'], 'predicted_sentiment': us_df['predicted_sentiment'], 'predicted_fold_sentiment': us_df['predicted_fold_sentiment']}) output_df.to_csv(f'{path}{model.name}_prediction.csv', index=False)
Editor is loading...
Leave a Comment