Untitled
#%% import import openai from docx import Document from docx.shared import Pt from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.oxml import OxmlElement # Set your OpenAI GPT-3 API key api_key = 'YOUR_API_KEY' openai.api_key = api_key #%% Function to read content from a DOCX file def read_docx(file_path): doc = Document(file_path) return doc # Function to proofread text using ChatGPT API def proofread_text(text_to_proofread): prompt = f"Automatically proofread the following text keeping in mind grammar, clarity, flow, professional style, but also try to maintain the author's style as much as possible. Track changes throughout the document and generate a .docx file with tracked changes.\n\n{text_to_proofread}" # Make a request to the OpenAI API response = openai.Completion.create( engine="text-davinci-003", prompt=prompt, temperature=0.7, max_tokens=2000, # Increase max_tokens as needed top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0, stop=None, # Add any specific stop words if needed ) # Extract the proofread suggestions from the API response proofread_suggestions = response.choices[0].text.strip() return proofread_suggestions #%% Function to apply tracked changes to a document def apply_tracked_changes(original_doc, changes): for paragraph in original_doc.paragraphs: for run in paragraph.runs: # Apply changes to text if run.text.strip(): run.clear() run.add_text(changes) run.font.color.theme_color = None # Clear theme color to avoid potential formatting issues run.font.highlight_color = None # Apply changes to formatting for paragraph in original_doc.paragraphs: if changes: # Apply changes to formatting run = paragraph.runs[0] run.text = changes run.font.color.theme_color = None # Clear theme color to avoid potential formatting issues run.font.highlight_color = None # Apply tracked changes run = paragraph.runs[0] if run.text.strip(): run.clear() run.add_text(changes) run.font.color.theme_color = None # Clear theme color to avoid potential formatting issues run.font.highlight_color = None run.element.getparent().remove(run.element) return original_doc #%% Path to the DOCX file docx_path = '9693-ao.docx' # Read content from the DOCX file original_doc = read_docx(docx_path) # Split the content into smaller chunks chunk_size = 1000 chunks = [original_doc.text[i:i + chunk_size] for i in range(0, len(original_doc.text), chunk_size)] # Proofread each chunk and store the results proofread_results = [] for chunk in chunks: proofread_result = proofread_text(chunk) proofread_results.append(proofread_result) #%% Combine the results final_proofread_result = "".join(proofread_results) # Apply tracked changes to the original document tracked_changes_doc = apply_tracked_changes(original_doc, final_proofread_result) # Save the document with tracked changes tracked_changes_doc.save('proofread_output.docx') print("Proofread document with tracked changes saved to 'proofread_output.docx'.")
Leave a Comment