pgvector vector search for redmine issues
import psycopg2 from ollama import embed from configparser import ConfigParser def load_config(filename='database.ini', section='postgresql'): parser = ConfigParser() parser.read(filename) config = {} if parser.has_section(section): params = parser.items(section) for param in params: config[param[0]] = param[1] else: raise Exception(f"Section {section} not found in the {filename} file.") return config def connect(config): try: conn = psycopg2.connect(**config) print("Connected to the PostgreSQL server.") return conn except psycopg2.DatabaseError as error: print(f"Database connection error: {error}") return None def fetch_issues_without_embeddings(conn): with conn.cursor() as cur: # find issues without embeddings, so to add to them later cur.execute(""" SELECT issues.id AS issue_id, issues.title, issues.description, users.id AS user_id, users.name AS user_name, users.email AS user_email FROM issues LEFT JOIN users ON issues.user_id = users.id WHERE issues.embedding IS NULL """) issues = cur.fetchall() return issues def create_embeddings(issues): issue_texts = [ f"Issue Title: {title}\nIssue Description: {description}\nUser Name: {user_name}\nUser ID: {user_id}\nUser Email: {user_email}" for _, title, description, user_id, user_name, user_email in issues ] embeddings = embed( model="llama3.2", input=issue_texts )["embeddings"] return embeddings, issue_texts def store_embeddings(conn, issues, embeddings): with conn.cursor() as cur: for (issue_id, _, _, _, _, _), embedding in zip(issues, embeddings): cur.execute( "UPDATE issues SET embedding = %s WHERE id = %s", (embedding, issue_id) ) conn.commit() print("Embeddings stored successfully.") def search_issues(conn, question): question_embedding = embed(model="llama3.2", input=question)["embeddings"][0] with conn.cursor() as cur: # find issues based on vector similarity (aka distance) cur.execute(""" SELECT issues.id, issues.title, issues.description, users.name, users.email, embedding <-> %s::vector AS distance FROM issues LEFT JOIN users ON issues.user_id = users.id ORDER BY distance LIMIT 5 """, (question_embedding,)) results = cur.fetchall() return results if __name__ == '__main__': try: config = load_config() conn = connect(config) if conn: issues_to_embed = fetch_issues_without_embeddings(conn) print(f"Found {len(issues_to_embed)} issues without embeddings.") if issues_to_embed: embeddings, issue_texts = create_embeddings(issues_to_embed) store_embeddings(conn, issues_to_embed, embeddings) question = "Who reported login issues or account-related problems?" results = search_issues(conn, question) most_relevant_issue_embedding = results[0][5] print(f"Question: {question}") print(f"Most relevant issue ({most_relevant_issue_embedding}):") if results: print(f"\tTitle: {results[0][1]}") print(f"\tDescription: {results[0][2]}") print(f"\tUser Name: {results[0][3]}") else: print("\tNo relevant issues found.") except Exception as e: print(f"An error occurred: {e}") finally: # close db conn if conn: conn.close() print("Database connection closed.")
Leave a Comment