Untitled
nigganomics 101unknown
python
2 years ago
5.6 kB
6
Indexable
import os import random import json import numpy as np import tensorflow as tf from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, TimeDistributed, Concatenate from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.preprocessing.text import Tokenizer # Assumed data directory data_dir = '/content/Example2_filtered' # Set random seed for reproducibility np.random.seed(1234) # Define constants max_text_sequence_length = 200 max_words = 10000 oov_token = "<OOV>" latent_dim = 100 text_encoding_dim = 50 epochs = 10 batch_size = 1 # Placeholder functions - Replace these with actual functions to load and split data def split_data(data_dir): # Implement data splitting logic here return train_folders, val_folders, test_folders def load_data(data_dir, folders): # Implement data loading logic here return text_data, keypoints_data def convert_dicts_to_lists(keypoints_data): # Implement conversion logic here return converted_data # Define data splitting, loading, and preprocessing train_folders, val_folders, test_folders = split_data(data_dir) train_text_data, train_keypoints_data = load_data(data_dir, train_folders) val_text_data, val_keypoints_data = load_data(data_dir, val_folders) test_text_data, test_keypoints_data = load_data(data_dir, test_folders) # Tokenize the text data tokenizer = Tokenizer(num_words=max_words, oov_token=oov_token) tokenizer.fit_on_texts(train_text_data) train_text_data = pad_sequences(tokenizer.texts_to_sequences(train_text_data), maxlen=max_text_sequence_length) val_text_data = pad_sequences(tokenizer.texts_to_sequences(val_text_data), maxlen=max_text_sequence_length) test_text_data = pad_sequences(tokenizer.texts_to_sequences(test_text_data), maxlen=max_text_sequence_length) # Convert and pad keypoints data train_keypoints_data = pad_sequences(convert_dicts_to_lists(train_keypoints_data), dtype='float32') val_keypoints_data = pad_sequences(convert_dicts_to_lists(val_keypoints_data), dtype='float32') test_keypoints_data = pad_sequences(convert_dicts_to_lists(test_keypoints_data), dtype='float32') # Get the output dimension of the keypoints data keypoints_output_dim = train_keypoints_data.shape[1] # Define text encoder def create_text_encoder(input_shape, encoding_size, tokenizer): vocab_size = len(tokenizer.word_index) + 1 model = tf.keras.models.Sequential([ Input(shape=input_shape, dtype=tf.int32), Embedding(vocab_size, encoding_size), LSTM(128) ]) return model # Define GAN models def build_generator(latent_dim, text_encoding_dim, keypoints_output_dim): z = Input(shape=(latent_dim,)) text_input = Input(shape=(text_encoding_dim,)) x = Concatenate()([z, text_input]) x = Dense(256, activation='relu')(x) x = Dense(512, activation='relu')(x) keypoints = Dense(keypoints_output_dim, activation='tanh')(x) return Model(inputs=[z, text_input], outputs=keypoints) def build_discriminator(keypoints_output_dim, text_encoding_dim): keypoints_input = Input(shape=(keypoints_output_dim,)) text_input = Input(shape=(text_encoding_dim,)) x = Concatenate()([keypoints_input, text_input]) x = Dense(256, activation='relu')(x) x = Dense(128, activation='relu')(x) validity = Dense(1, activation='sigmoid')(x) return Model(inputs=[keypoints_input, text_input], outputs=validity) # Define GAN architecture def build_gan(generator, discriminator): discriminator.trainable = False z = Input(shape=(latent_dim,)) text_input = Input(shape=(text_encoding_dim,)) keypoints = generator([z, text_input]) validity = discriminator([keypoints, text_input]) return Model(inputs=[z, text_input], outputs=validity) # Instantiate models text_encoder = create_text_encoder(input_shape=(max_text_sequence_length,), encoding_size=text_encoding_dim, tokenizer=tokenizer) generator = build_generator(latent_dim, text_encoding_dim, keypoints_output_dim) discriminator = build_discriminator(keypoints_output_dim, text_encoding_dim) gan = build_gan(generator, discriminator) # Compile models discriminator.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy', metrics=['accuracy']) gan.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy') # Train GAN for epoch in range(epochs): # Train discriminator idx = np.random.randint(0, train_keypoints_data.shape[0], batch_size) real_keypoints = train_keypoints_data[idx] text_idx = np.random.randint(0, train_text_data.shape[0], batch_size) text_data = train_text_data[text_idx] text_encodings = text_encoder.predict(text_data) z_noise = np.random.normal(0, 1, (batch_size, latent_dim)) fake_keypoints = generator.predict([z_noise, text_encodings]) real_y = np.ones((batch_size, 1)) fake_y = np.zeros((batch_size, 1)) d_loss_real = discriminator.train_on_batch([real_keypoints, text_encodings], real_y) d_loss_fake = discriminator.train_on_batch([fake_keypoints, text_encodings], fake_y) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # Train generator valid_y = np.ones((batch_size, 1)) g_loss = gan.train_on_batch([z_noise, text_encodings], valid_y) # Print progress print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]") # Note: The code above assumes the existence of data loading and preprocessing functions. # Please ensure that you have implemented the necessary functions and that the data is correctly formatted.
Editor is loading...