Untitled
unknown
python
a year ago
4.8 kB
5
Indexable
#@title test3 מעולה זהו !! # @title ChatGroq from langchain_groq import ChatGroq from langchain.prompts import ChatPromptTemplate from langchain.schema import StrOutputParser import pandas as pd from tqdm import tqdm from google.colab import userdata import re class ConversationAnalyzer: def __init__(self): self.model = ChatGroq(model_name="llama3-70b-8192", api_key=userdata.get('LlamaKey')) self.prompt = ChatPromptTemplate.from_messages([ ("system", "You are an expert in analyzing conversations. Your task is to determine if two given texts could be consecutive turns in a natural, coherent dialogue. Consider context, relevance, and natural flow of conversation."), ("human", "Text 1: {text1}\nText 2: {text2}\n\nCould these two texts be consecutive turns in a natural, coherent dialogue? Respond with a score between 0 (not at all coherent) and 1 (completely coherent), and explain your reasoning."), ]) self.chain = self.prompt | self.model | StrOutputParser() def analyze_pair(self, text1, text2): result = self.chain.invoke({"text1": text1, "text2": text2}) try: # Search for the first occurrence of a numeric value in the response score_match = re.search(r"\b\d+(\.\d+)?\b", result) if score_match: score = float(score_match.group(0)) else: raise ValueError("No numeric score found in the result") explanation = result.split("\n", 1)[1].strip() if "\n" in result else result.strip() except ValueError as ve: print(f"Error parsing result: {result}\nError: {ve}") score = 0.0 explanation = "Failed to parse model output: could not convert string to float." except Exception as e: print(f"Unexpected error parsing result: {result}\nError: {e}") score = 0.0 explanation = "Failed to parse model output: unexpected error." return score, explanation class DataLoader: def __init__(self): self.dataframe = None def load_data(self, file_name): if self.dataframe is None: self.dataframe = pd.read_pickle(file_name) self.dataframe = self.dataframe.reset_index(drop=True) print(f"Loaded {file_name}. Shape: {self.dataframe.shape}") return self.dataframe def find_coherent_conversations(loader, analyzer, file_name): coherent_pairs = [] df = loader.load_data(file_name) total_pairs = len(df) for idx1 in tqdm(range(total_pairs), desc=f"Processing {file_name}"): for idx2 in range(idx1 + 1, total_pairs): text1 = df.loc[idx1, 'sentence'] text2 = df.loc[idx2, 'sentence'] score, explanation = analyzer.analyze_pair(text1, text2) if score > 0: # Consider only pairs with a positive coherence score coherent_pairs.append({ 'index1': idx1, 'index2': idx2, 'text1': text1, 'text2': text2, 'score': score, 'explanation': explanation }) print(f"Coherent pair found with score {score}:") print(f"Text 1: {text1}") print(f"Text 2: {text2}") print(f"Explanation: {explanation}") print("-" * 50) return pd.DataFrame(coherent_pairs) loader = DataLoader() analyzer = ConversationAnalyzer() file_name = '/content/df_for_dori.pkl' results = find_coherent_conversations(loader, analyzer, file_name) # Load original dataframe to get additional information original_df = loader.load_data(file_name) # Add additional information to results results['path1'] = results['index1'].map(original_df['path']) results['start_cd1'] = results['index1'].map(original_df['start_cd']) results['end_cd1'] = results['index1'].map(original_df['end_cd']) results['times1'] = results['index1'].map(original_df['times']) results['path2'] = results['index2'].map(original_df['path']) results['start_cd2'] = results['index2'].map(original_df['start_cd']) results['end_cd2'] = results['index2'].map(original_df['end_cd']) results['times2'] = results['index2'].map(original_df['times']) # Reorder columns results = results[['index1', 'path1', 'text1', 'start_cd1', 'end_cd1', 'times1', 'index2', 'path2', 'text2', 'start_cd2', 'end_cd2', 'times2', 'score', 'explanation']] # Save results output_file = 'coherent_conversations_results.csv' results.to_csv(output_file, index=False) print(f"Found {len(results)} coherent conversation pairs. Results saved to '{output_file}'")
Editor is loading...
Leave a Comment