Untitled

mail@pastecode.io avatar
unknown
plain_text
5 months ago
5.8 kB
2
Indexable
apt update && sudo apt upgrade -y 
sudo apt-get install python3-tk
add-apt-repository ppa:deadsnakes/ppa 
apt install python3.10
pip install datasets
sudo apt-get install python3-tk
pip install transformers
import os
import json
from datasets import Dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, pipeline
from tkinter import Tk, filedialog

# Directory where all files related to AI are stored
base_dir = os.path.dirname(os.path.abspath(__file__))

# File paths
config_file = os.path.join(base_dir, 'config.json')
data_file = os.path.join(base_dir, 'training_data.txt')
model_dir = os.path.join(base_dir, 'fine_tuned_model')
results_dir = os.path.join(base_dir, 'results')

# Function to open a file dialog and let the user select a directory
def select_directory():
    root = Tk()
    root.withdraw()  # Hide the root window
    directory_path = filedialog.askdirectory(title="Select Directory Containing Lua Scripts")
    return directory_path

# Function to load and concatenate Lua scripts from a directory
def load_lua_scripts_from_directory(directory_path):
    lua_data = ''
    for filename in os.listdir(directory_path):
        if filename.endswith('.lua'):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                lua_data += file.read() + '\n\n'  # Adding a delimiter between scripts
    return lua_data

# Function to save the new data to the training data file
def append_to_training_data(new_data):
    with open(data_file, 'a', encoding='utf-8') as file:
        file.write(new_data + '\n\n')

# Function to train the model on the data
def train_model_on_data():
    if not os.path.exists(data_file):
        print("No training data found. Please add some data first.")
        return
    
    # Load the existing training data
    with open(data_file, 'r', encoding='utf-8') as file:
        lua_text = file.read()

    # Create a Dataset object
    dataset = Dataset.from_dict({"text": [lua_text]})

    # Load GPT-2 model and tokenizer
    model_name = "gpt2"
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)

    # Tokenize the dataset
    def tokenize_function(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=results_dir,
        per_device_train_batch_size=2,
        num_train_epochs=3,
        save_steps=10_000,
        save_total_limit=2,
    )

    # Trainer setup
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )

    # Train the model
    trainer.train()

    # Save the fine-tuned model
    model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    print("Training completed and model saved.")

# Function to generate conversational code response with AI
def generate_code_with_ai():
    if not os.path.exists(model_dir):
        print("Fine-tuned model not found. Please train the model first.")
        return

    # Load the fine-tuned model and tokenizer
    model = GPT2LMHeadModel.from_pretrained(model_dir)
    tokenizer = GPT2Tokenizer.from_pretrained(model_dir)

    # Create a text generation pipeline
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

    # Ask the user what code they want to generate
    prompt = input("What should the AI generate? ")

    # Generate the conversational response and code
    conversational_prompt = (
        f"Generate Lua code based on the following request: {prompt}. "
        f"Provide an explanation followed by the Lua code separated by a clear divider."
    )

    response = generator(conversational_prompt, max_length=200, num_return_sequences=1)[0]['generated_text']

    # Display the generated response
    explanation, code = response.split("```")[0], response.split("```")[-1]
    print("Explanation:\n")
    print(explanation.strip())
    print("\n---\nLua Code:\n")
    print(code.strip())

# Main program
def main():
    # Ask the user what they want to do
    choice = input("Do you want to (1) Add more data or (2) Generate code? ")

    if choice == '1':
        # Check if the config file exists
        if os.path.exists(config_file):
            # Load the last used directory from the config file
            with open(config_file, 'r') as file:
                config = json.load(file)
            lua_directory_path = config.get('last_directory')
            print(f"Using the previously selected directory: {lua_directory_path}")
        else:
            lua_directory_path = None

        # If no directory was found or selected, prompt the user to select one
        if not lua_directory_path:
            lua_directory_path = select_directory()
            if lua_directory_path:
                # Save the selected directory to the config file
                with open(config_file, 'w') as file:
                    json.dump({'last_directory': lua_directory_path}, file)
            else:
                print("No directory selected. Exiting...")
                return

        # Load new data and append it to the existing training data file
        new_data = load_lua_scripts_from_directory(lua_directory_path)
        append_to_training_data(new_data)

    elif choice == '2':
        # Generate code with AI
        generate_code_with_ai()

    else:
        print("Invalid choice. Exiting...")

    # Optionally, retrain the model after adding new data
    if choice == '1':
        train_model_on_data()

if __name__ == "__main__":
    main()
Leave a Comment