Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.5 kB
2
Indexable
Never
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN, Flatten


with open('ten_tep_tin_nguoc_lai.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()

train_data = np.array([chuyen_chu_thanh_dau_cau(line) for line in lines])
labels = np.array(["your_label"] * len(train_data))  # Thay "your_label" bằng nhãn thích hợp

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(train_data)
X_train = pad_sequences(sequences)
X_train, X_test, y_train, y_test = train_test_split(X_train, encoded_labels, test_size=0.2, random_state=42)

# Neural Network
model_nn = Sequential()
model_nn.add(Embedding(input_dim=len(word_index) + 1, output_dim=32, input_length=X_train.shape[1]))
model_nn.add(Flatten())
model_nn.add(Dense(128, activation='relu'))
model_nn.add(Dense(1, activation='sigmoid'))

# RNN 
model_rnn = Sequential()
model_rnn.add(Embedding(input_dim=len(word_index) + 1, output_dim=32, input_length=X_train.shape[1]))
model_rnn.add(SimpleRNN(64, activation='relu'))
model_rnn.add(Dense(1, activation='sigmoid'))