Untitled
unknown
python
a year ago
4.8 kB
9
Indexable
#@title test3 מעולה זהו !!
# @title ChatGroq
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
import pandas as pd
from tqdm import tqdm
from google.colab import userdata
import re
class ConversationAnalyzer:
def __init__(self):
self.model = ChatGroq(model_name="llama3-70b-8192", api_key=userdata.get('LlamaKey'))
self.prompt = ChatPromptTemplate.from_messages([
("system", "You are an expert in analyzing conversations. Your task is to determine if two given texts could be consecutive turns in a natural, coherent dialogue. Consider context, relevance, and natural flow of conversation."),
("human", "Text 1: {text1}\nText 2: {text2}\n\nCould these two texts be consecutive turns in a natural, coherent dialogue? Respond with a score between 0 (not at all coherent) and 1 (completely coherent), and explain your reasoning."),
])
self.chain = self.prompt | self.model | StrOutputParser()
def analyze_pair(self, text1, text2):
result = self.chain.invoke({"text1": text1, "text2": text2})
try:
# Search for the first occurrence of a numeric value in the response
score_match = re.search(r"\b\d+(\.\d+)?\b", result)
if score_match:
score = float(score_match.group(0))
else:
raise ValueError("No numeric score found in the result")
explanation = result.split("\n", 1)[1].strip() if "\n" in result else result.strip()
except ValueError as ve:
print(f"Error parsing result: {result}\nError: {ve}")
score = 0.0
explanation = "Failed to parse model output: could not convert string to float."
except Exception as e:
print(f"Unexpected error parsing result: {result}\nError: {e}")
score = 0.0
explanation = "Failed to parse model output: unexpected error."
return score, explanation
class DataLoader:
def __init__(self):
self.dataframe = None
def load_data(self, file_name):
if self.dataframe is None:
self.dataframe = pd.read_pickle(file_name)
self.dataframe = self.dataframe.reset_index(drop=True)
print(f"Loaded {file_name}. Shape: {self.dataframe.shape}")
return self.dataframe
def find_coherent_conversations(loader, analyzer, file_name):
coherent_pairs = []
df = loader.load_data(file_name)
total_pairs = len(df)
for idx1 in tqdm(range(total_pairs), desc=f"Processing {file_name}"):
for idx2 in range(idx1 + 1, total_pairs):
text1 = df.loc[idx1, 'sentence']
text2 = df.loc[idx2, 'sentence']
score, explanation = analyzer.analyze_pair(text1, text2)
if score > 0: # Consider only pairs with a positive coherence score
coherent_pairs.append({
'index1': idx1,
'index2': idx2,
'text1': text1,
'text2': text2,
'score': score,
'explanation': explanation
})
print(f"Coherent pair found with score {score}:")
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Explanation: {explanation}")
print("-" * 50)
return pd.DataFrame(coherent_pairs)
loader = DataLoader()
analyzer = ConversationAnalyzer()
file_name = '/content/df_for_dori.pkl'
results = find_coherent_conversations(loader, analyzer, file_name)
# Load original dataframe to get additional information
original_df = loader.load_data(file_name)
# Add additional information to results
results['path1'] = results['index1'].map(original_df['path'])
results['start_cd1'] = results['index1'].map(original_df['start_cd'])
results['end_cd1'] = results['index1'].map(original_df['end_cd'])
results['times1'] = results['index1'].map(original_df['times'])
results['path2'] = results['index2'].map(original_df['path'])
results['start_cd2'] = results['index2'].map(original_df['start_cd'])
results['end_cd2'] = results['index2'].map(original_df['end_cd'])
results['times2'] = results['index2'].map(original_df['times'])
# Reorder columns
results = results[['index1', 'path1', 'text1', 'start_cd1', 'end_cd1', 'times1',
'index2', 'path2', 'text2', 'start_cd2', 'end_cd2', 'times2',
'score', 'explanation']]
# Save results
output_file = 'coherent_conversations_results.csv'
results.to_csv(output_file, index=False)
print(f"Found {len(results)} coherent conversation pairs. Results saved to '{output_file}'")Editor is loading...
Leave a Comment