cnn_regression

mail@pastecode.io avatar
unknown
python
4 months ago
4.5 kB
4
Indexable
import tensorflow as tf
import numpy as np
from src import helper
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import time


class CWI_CNN(keras.Model):
    def __init__(
            self,
            embedding_dims,
            filter_sizes,
            num_filters,
            lang="French",
    ):
        super(CWI_CNN, self).__init__()
        self.lang = lang

        self.conv_layers = []
        for filter_size in filter_sizes:
            conv = layers.Conv2D(
                filters=num_filters,
                kernel_size=(filter_size, embedding_dims),
                activation='relu'
            )
            self.conv_layers.append(conv)

        self.pooling = layers.GlobalMaxPooling2D()
        self.dropout1 = layers.Dropout(0.5)
        self.dense1 = layers.Dense(256, activation='relu')
        self.dropout2 = layers.Dropout(0.5)
        self.dense2 = layers.Dense(64, activation='relu')
        self.output_layer = layers.Dense(1)

    def call(self, inputs, training=False):
        x = tf.expand_dims(inputs, -1)
        conv_outputs = []
        for conv in self.conv_layers:
            conv_out = conv(x)
            conv_outputs.append(conv_out)

        x = tf.concat(conv_outputs, axis=3)
        x = self.pooling(x)
        x = self.dropout1(x, training=training)
        x = self.dense1(x)
        x = self.dropout2(x, training=training)
        x = self.dense2(x)
        return self.output_layer(x)


def run_training(out_dir, x_train, y_train, x_valid, y_valid, feature_args, lang, configs):
    print("x_train.shape:", x_train.shape)
    print("y_train.shape:", y_train.shape)

    model = CWI_CNN(
        embedding_dims=x_train.shape[2],
        filter_sizes=list(map(int, configs.FILTER_SIZES.split(","))),
        num_filters=configs.NUM_FILTERS,
        lang=lang
    )

    optimizer = keras.optimizers.Adam(learning_rate=configs.LEARNING_RATE)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    # Callbacks
    checkpoint_path = str(Path(out_dir) / "checkpoints" / "model.ckpt")
    cp_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_mae', mode='min'
    )
    early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(
        x_train, y_train,
        epochs=configs.NUM_EPOCHS,
        batch_size=configs.BATCH_SIZE,
        validation_data=(x_valid, y_valid),
        callbacks=[cp_callback, early_stopping]
    )

    # Save the entire model
    model.save(str(Path(out_dir) / "saved_model"))

    return out_dir.stem


def evaluate(x_test, y_test, x_test_sents, model_dir=None, features=None, output_dir=None, test_name=None, lang=None):
    p = Path(f'models/{lang}/CNNReg')
    dirs = sorted(p.iterdir(), key=lambda f: f.stat().st_mtime)

    if len(dirs) > 0:
        if model_dir is not None:
            checkpoint_dir = Path(f'models/{lang}/CNNReg/{model_dir}')
        else:
            checkpoint_dir = Path(str(dirs[-1]))  # load the last checkpoint

        print(f"Checkpoint dir: {checkpoint_dir}")

        # Load the saved model
        model = keras.models.load_model(str(checkpoint_dir / "saved_model"))

        # Make predictions
        all_predictions = model.predict(x_test)

        if output_dir is None:
            output_dir = checkpoint_dir

        Path(output_dir).mkdir(parents=True, exist_ok=True)
        model_name = checkpoint_dir.parent.stem + '_' + checkpoint_dir.stem

        y_test = np.argmax(y_test, axis=1)

        save_evaluation_report_reg(all_predictions, y_test, x_test_sents, output_dir, model_name, test_name, features)
        return all_predictions
    else:
        print("You haven't trained a model yet.")


def save_evaluation_report_reg(all_predictions, y_test, x_test_sents, output_dir, model_name, test_name, features):
    helper.save_evaluation_report_reg(all_predictions, y_test, x_test_sents, output_dir, model_name, test_name,
                                      features)



def train(x_train, y_train, x_valid, y_valid, feature_args, lang=None, configs=None):
    timestamp = str(int(time.time()))
    out_dir = Path(f'models/{lang}/CNNReg/{timestamp}')
    out_dir.mkdir(parents=True, exist_ok=True)

    with open(out_dir / 'logs.txt', 'w') as f:
        return run_training(out_dir, x_train, y_train, x_valid, y_valid, feature_args, lang, configs)

Leave a Comment