Untitled
unknown
plain_text
11 days ago
5.8 kB
2
Indexable
Never
apt update && sudo apt upgrade -y sudo apt-get install python3-tk add-apt-repository ppa:deadsnakes/ppa apt install python3.10 pip install datasets sudo apt-get install python3-tk pip install transformers import os import json from datasets import Dataset from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, pipeline from tkinter import Tk, filedialog # Directory where all files related to AI are stored base_dir = os.path.dirname(os.path.abspath(__file__)) # File paths config_file = os.path.join(base_dir, 'config.json') data_file = os.path.join(base_dir, 'training_data.txt') model_dir = os.path.join(base_dir, 'fine_tuned_model') results_dir = os.path.join(base_dir, 'results') # Function to open a file dialog and let the user select a directory def select_directory(): root = Tk() root.withdraw() # Hide the root window directory_path = filedialog.askdirectory(title="Select Directory Containing Lua Scripts") return directory_path # Function to load and concatenate Lua scripts from a directory def load_lua_scripts_from_directory(directory_path): lua_data = '' for filename in os.listdir(directory_path): if filename.endswith('.lua'): file_path = os.path.join(directory_path, filename) with open(file_path, 'r', encoding='utf-8') as file: lua_data += file.read() + '\n\n' # Adding a delimiter between scripts return lua_data # Function to save the new data to the training data file def append_to_training_data(new_data): with open(data_file, 'a', encoding='utf-8') as file: file.write(new_data + '\n\n') # Function to train the model on the data def train_model_on_data(): if not os.path.exists(data_file): print("No training data found. Please add some data first.") return # Load the existing training data with open(data_file, 'r', encoding='utf-8') as file: lua_text = file.read() # Create a Dataset object dataset = Dataset.from_dict({"text": [lua_text]}) # Load GPT-2 model and tokenizer model_name = "gpt2" model = GPT2LMHeadModel.from_pretrained(model_name) tokenizer = GPT2Tokenizer.from_pretrained(model_name) # Tokenize the dataset def tokenize_function(examples): return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128) tokenized_dataset = dataset.map(tokenize_function, batched=True) # Training arguments training_args = TrainingArguments( output_dir=results_dir, per_device_train_batch_size=2, num_train_epochs=3, save_steps=10_000, save_total_limit=2, ) # Trainer setup trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, ) # Train the model trainer.train() # Save the fine-tuned model model.save_pretrained(model_dir) tokenizer.save_pretrained(model_dir) print("Training completed and model saved.") # Function to generate conversational code response with AI def generate_code_with_ai(): if not os.path.exists(model_dir): print("Fine-tuned model not found. Please train the model first.") return # Load the fine-tuned model and tokenizer model = GPT2LMHeadModel.from_pretrained(model_dir) tokenizer = GPT2Tokenizer.from_pretrained(model_dir) # Create a text generation pipeline generator = pipeline('text-generation', model=model, tokenizer=tokenizer) # Ask the user what code they want to generate prompt = input("What should the AI generate? ") # Generate the conversational response and code conversational_prompt = ( f"Generate Lua code based on the following request: {prompt}. " f"Provide an explanation followed by the Lua code separated by a clear divider." ) response = generator(conversational_prompt, max_length=200, num_return_sequences=1)[0]['generated_text'] # Display the generated response explanation, code = response.split("```")[0], response.split("```")[-1] print("Explanation:\n") print(explanation.strip()) print("\n---\nLua Code:\n") print(code.strip()) # Main program def main(): # Ask the user what they want to do choice = input("Do you want to (1) Add more data or (2) Generate code? ") if choice == '1': # Check if the config file exists if os.path.exists(config_file): # Load the last used directory from the config file with open(config_file, 'r') as file: config = json.load(file) lua_directory_path = config.get('last_directory') print(f"Using the previously selected directory: {lua_directory_path}") else: lua_directory_path = None # If no directory was found or selected, prompt the user to select one if not lua_directory_path: lua_directory_path = select_directory() if lua_directory_path: # Save the selected directory to the config file with open(config_file, 'w') as file: json.dump({'last_directory': lua_directory_path}, file) else: print("No directory selected. Exiting...") return # Load new data and append it to the existing training data file new_data = load_lua_scripts_from_directory(lua_directory_path) append_to_training_data(new_data) elif choice == '2': # Generate code with AI generate_code_with_ai() else: print("Invalid choice. Exiting...") # Optionally, retrain the model after adding new data if choice == '1': train_model_on_data() if __name__ == "__main__": main()
Leave a Comment