pgvector vector search for redmine issues

 avatar
unknown
python
2 months ago
4.1 kB
6
Indexable
import psycopg2
from ollama import embed
from configparser import ConfigParser

def load_config(filename='database.ini', section='postgresql'):
    parser = ConfigParser()
    parser.read(filename)
    config = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            config[param[0]] = param[1]
    else:
        raise Exception(f"Section {section} not found in the {filename} file.")
    return config

def connect(config):
    try:
        conn = psycopg2.connect(**config)
        print("Connected to the PostgreSQL server.")
        return conn
    except psycopg2.DatabaseError as error:
        print(f"Database connection error: {error}")
        return None

def fetch_issues_without_embeddings(conn):
    with conn.cursor() as cur:
        # find issues without embeddings, so to add to them later
        cur.execute("""
            SELECT
                issues.id AS issue_id,
                issues.title,
                issues.description,
                users.id AS user_id,
                users.name AS user_name,
                users.email AS user_email
            FROM
                issues
            LEFT JOIN
                users ON issues.user_id = users.id
            WHERE
                issues.embedding IS NULL
        """)
        issues = cur.fetchall()
    return issues

def create_embeddings(issues):
    issue_texts = [
        f"Issue Title: {title}\nIssue Description: {description}\nUser Name: {user_name}\nUser ID: {user_id}\nUser Email: {user_email}"
        for _, title, description, user_id, user_name, user_email in issues
    ]
    embeddings = embed(
        model="llama3.2",
        input=issue_texts
    )["embeddings"]

    return embeddings, issue_texts

def store_embeddings(conn, issues, embeddings):
    with conn.cursor() as cur:
        for (issue_id, _, _, _, _, _), embedding in zip(issues, embeddings):
            cur.execute(
                "UPDATE issues SET embedding = %s WHERE id = %s",
                (embedding, issue_id)
            )
        conn.commit()
    print("Embeddings stored successfully.")

def search_issues(conn, question):
    question_embedding = embed(model="llama3.2", input=question)["embeddings"][0]

    with conn.cursor() as cur:
        # find issues based on vector similarity (aka distance)
        cur.execute("""
            SELECT
                issues.id,
                issues.title,
                issues.description,
                users.name,
                users.email,
                embedding <-> %s::vector AS distance
            FROM
                issues
            LEFT JOIN
                users ON issues.user_id = users.id
            ORDER BY
                distance
            LIMIT 5
        """, (question_embedding,))
        results = cur.fetchall()
    return results

if __name__ == '__main__':
    try:
        config = load_config()
        conn = connect(config)
        if conn:
            issues_to_embed = fetch_issues_without_embeddings(conn)
            print(f"Found {len(issues_to_embed)} issues without embeddings.")

            if issues_to_embed:
                embeddings, issue_texts = create_embeddings(issues_to_embed)
                store_embeddings(conn, issues_to_embed, embeddings)

            question = "Who reported login issues or account-related problems?"
            results = search_issues(conn, question)

            most_relevant_issue_embedding = results[0][5]

            print(f"Question: {question}")
            print(f"Most relevant issue ({most_relevant_issue_embedding}):")
            if results:
                print(f"\tTitle: {results[0][1]}")
                print(f"\tDescription: {results[0][2]}")
                print(f"\tUser Name: {results[0][3]}")
            else:
                print("\tNo relevant issues found.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # close db conn
        if conn:
            conn.close()
            print("Database connection closed.")
Leave a Comment