Untitled

nigganomics 101
mail@pastecode.io avatar
unknown
python
a year ago
5.6 kB
3
Indexable
Never
import os
import random
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, TimeDistributed, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Assumed data directory
data_dir = '/content/Example2_filtered'

# Set random seed for reproducibility
np.random.seed(1234)

# Define constants
max_text_sequence_length = 200
max_words = 10000
oov_token = "<OOV>"
latent_dim = 100
text_encoding_dim = 50
epochs = 10
batch_size = 1

# Placeholder functions - Replace these with actual functions to load and split data
def split_data(data_dir):
    # Implement data splitting logic here
    return train_folders, val_folders, test_folders

def load_data(data_dir, folders):
    # Implement data loading logic here
    return text_data, keypoints_data

def convert_dicts_to_lists(keypoints_data):
    # Implement conversion logic here
    return converted_data

# Define data splitting, loading, and preprocessing
train_folders, val_folders, test_folders = split_data(data_dir)

train_text_data, train_keypoints_data = load_data(data_dir, train_folders)
val_text_data, val_keypoints_data = load_data(data_dir, val_folders)
test_text_data, test_keypoints_data = load_data(data_dir, test_folders)

# Tokenize the text data
tokenizer = Tokenizer(num_words=max_words, oov_token=oov_token)
tokenizer.fit_on_texts(train_text_data)
train_text_data = pad_sequences(tokenizer.texts_to_sequences(train_text_data), maxlen=max_text_sequence_length)
val_text_data = pad_sequences(tokenizer.texts_to_sequences(val_text_data), maxlen=max_text_sequence_length)
test_text_data = pad_sequences(tokenizer.texts_to_sequences(test_text_data), maxlen=max_text_sequence_length)

# Convert and pad keypoints data
train_keypoints_data = pad_sequences(convert_dicts_to_lists(train_keypoints_data), dtype='float32')
val_keypoints_data = pad_sequences(convert_dicts_to_lists(val_keypoints_data), dtype='float32')
test_keypoints_data = pad_sequences(convert_dicts_to_lists(test_keypoints_data), dtype='float32')

# Get the output dimension of the keypoints data
keypoints_output_dim = train_keypoints_data.shape[1]

# Define text encoder
def create_text_encoder(input_shape, encoding_size, tokenizer):
    vocab_size = len(tokenizer.word_index) + 1
    model = tf.keras.models.Sequential([
        Input(shape=input_shape, dtype=tf.int32),
        Embedding(vocab_size, encoding_size),
        LSTM(128)
    ])
    return model

# Define GAN models
def build_generator(latent_dim, text_encoding_dim, keypoints_output_dim):
    z = Input(shape=(latent_dim,))
    text_input = Input(shape=(text_encoding_dim,))
    x = Concatenate()([z, text_input])
    x = Dense(256, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    keypoints = Dense(keypoints_output_dim, activation='tanh')(x)
    return Model(inputs=[z, text_input], outputs=keypoints)

def build_discriminator(keypoints_output_dim, text_encoding_dim):
    keypoints_input = Input(shape=(keypoints_output_dim,))
    text_input = Input(shape=(text_encoding_dim,))
    x = Concatenate()([keypoints_input, text_input])
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    validity = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[keypoints_input, text_input], outputs=validity)

# Define GAN architecture
def build_gan(generator, discriminator):
    discriminator.trainable = False
    z = Input(shape=(latent_dim,))
    text_input = Input(shape=(text_encoding_dim,))
    keypoints = generator([z, text_input])
    validity = discriminator([keypoints, text_input])
    return Model(inputs=[z, text_input], outputs=validity)

# Instantiate models
text_encoder = create_text_encoder(input_shape=(max_text_sequence_length,), encoding_size=text_encoding_dim, tokenizer=tokenizer)
generator = build_generator(latent_dim, text_encoding_dim, keypoints_output_dim)
discriminator = build_discriminator(keypoints_output_dim, text_encoding_dim)
gan = build_gan(generator, discriminator)

# Compile models
discriminator.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy', metrics=['accuracy'])
gan.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy')

# Train GAN
for epoch in range(epochs):
    # Train discriminator
    idx = np.random.randint(0, train_keypoints_data.shape[0], batch_size)
    real_keypoints = train_keypoints_data[idx]

    text_idx = np.random.randint(0, train_text_data.shape[0], batch_size)
    text_data = train_text_data[text_idx]
    text_encodings = text_encoder.predict(text_data)

    z_noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_keypoints = generator.predict([z_noise, text_encodings])

    real_y = np.ones((batch_size, 1))
    fake_y = np.zeros((batch_size, 1))

    d_loss_real = discriminator.train_on_batch([real_keypoints, text_encodings], real_y)
    d_loss_fake = discriminator.train_on_batch([fake_keypoints, text_encodings], fake_y)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train generator
    valid_y = np.ones((batch_size, 1))
    g_loss = gan.train_on_batch([z_noise, text_encodings], valid_y)

    # Print progress
    print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

# Note: The code above assumes the existence of data loading and preprocessing functions.
# Please ensure that you have implemented the necessary functions and that the data is correctly formatted.