Untitled
unknown
plain_text
a year ago
5.8 kB
11
Indexable
apt update && sudo apt upgrade -y
sudo apt-get install python3-tk
add-apt-repository ppa:deadsnakes/ppa
apt install python3.10
pip install datasets
sudo apt-get install python3-tk
pip install transformers
import os
import json
from datasets import Dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, pipeline
from tkinter import Tk, filedialog
# Directory where all files related to AI are stored
base_dir = os.path.dirname(os.path.abspath(__file__))
# File paths
config_file = os.path.join(base_dir, 'config.json')
data_file = os.path.join(base_dir, 'training_data.txt')
model_dir = os.path.join(base_dir, 'fine_tuned_model')
results_dir = os.path.join(base_dir, 'results')
# Function to open a file dialog and let the user select a directory
def select_directory():
root = Tk()
root.withdraw() # Hide the root window
directory_path = filedialog.askdirectory(title="Select Directory Containing Lua Scripts")
return directory_path
# Function to load and concatenate Lua scripts from a directory
def load_lua_scripts_from_directory(directory_path):
lua_data = ''
for filename in os.listdir(directory_path):
if filename.endswith('.lua'):
file_path = os.path.join(directory_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
lua_data += file.read() + '\n\n' # Adding a delimiter between scripts
return lua_data
# Function to save the new data to the training data file
def append_to_training_data(new_data):
with open(data_file, 'a', encoding='utf-8') as file:
file.write(new_data + '\n\n')
# Function to train the model on the data
def train_model_on_data():
if not os.path.exists(data_file):
print("No training data found. Please add some data first.")
return
# Load the existing training data
with open(data_file, 'r', encoding='utf-8') as file:
lua_text = file.read()
# Create a Dataset object
dataset = Dataset.from_dict({"text": [lua_text]})
# Load GPT-2 model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# Tokenize the dataset
def tokenize_function(examples):
return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Training arguments
training_args = TrainingArguments(
output_dir=results_dir,
per_device_train_batch_size=2,
num_train_epochs=3,
save_steps=10_000,
save_total_limit=2,
)
# Trainer setup
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
)
# Train the model
trainer.train()
# Save the fine-tuned model
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)
print("Training completed and model saved.")
# Function to generate conversational code response with AI
def generate_code_with_ai():
if not os.path.exists(model_dir):
print("Fine-tuned model not found. Please train the model first.")
return
# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained(model_dir)
tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
# Create a text generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
# Ask the user what code they want to generate
prompt = input("What should the AI generate? ")
# Generate the conversational response and code
conversational_prompt = (
f"Generate Lua code based on the following request: {prompt}. "
f"Provide an explanation followed by the Lua code separated by a clear divider."
)
response = generator(conversational_prompt, max_length=200, num_return_sequences=1)[0]['generated_text']
# Display the generated response
explanation, code = response.split("```")[0], response.split("```")[-1]
print("Explanation:\n")
print(explanation.strip())
print("\n---\nLua Code:\n")
print(code.strip())
# Main program
def main():
# Ask the user what they want to do
choice = input("Do you want to (1) Add more data or (2) Generate code? ")
if choice == '1':
# Check if the config file exists
if os.path.exists(config_file):
# Load the last used directory from the config file
with open(config_file, 'r') as file:
config = json.load(file)
lua_directory_path = config.get('last_directory')
print(f"Using the previously selected directory: {lua_directory_path}")
else:
lua_directory_path = None
# If no directory was found or selected, prompt the user to select one
if not lua_directory_path:
lua_directory_path = select_directory()
if lua_directory_path:
# Save the selected directory to the config file
with open(config_file, 'w') as file:
json.dump({'last_directory': lua_directory_path}, file)
else:
print("No directory selected. Exiting...")
return
# Load new data and append it to the existing training data file
new_data = load_lua_scripts_from_directory(lua_directory_path)
append_to_training_data(new_data)
elif choice == '2':
# Generate code with AI
generate_code_with_ai()
else:
print("Invalid choice. Exiting...")
# Optionally, retrain the model after adding new data
if choice == '1':
train_model_on_data()
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment