pgvector vector search for redmine issues
unknown
python
a year ago
4.1 kB
14
Indexable
import psycopg2
from ollama import embed
from configparser import ConfigParser
def load_config(filename='database.ini', section='postgresql'):
parser = ConfigParser()
parser.read(filename)
config = {}
if parser.has_section(section):
params = parser.items(section)
for param in params:
config[param[0]] = param[1]
else:
raise Exception(f"Section {section} not found in the {filename} file.")
return config
def connect(config):
try:
conn = psycopg2.connect(**config)
print("Connected to the PostgreSQL server.")
return conn
except psycopg2.DatabaseError as error:
print(f"Database connection error: {error}")
return None
def fetch_issues_without_embeddings(conn):
with conn.cursor() as cur:
# find issues without embeddings, so to add to them later
cur.execute("""
SELECT
issues.id AS issue_id,
issues.title,
issues.description,
users.id AS user_id,
users.name AS user_name,
users.email AS user_email
FROM
issues
LEFT JOIN
users ON issues.user_id = users.id
WHERE
issues.embedding IS NULL
""")
issues = cur.fetchall()
return issues
def create_embeddings(issues):
issue_texts = [
f"Issue Title: {title}\nIssue Description: {description}\nUser Name: {user_name}\nUser ID: {user_id}\nUser Email: {user_email}"
for _, title, description, user_id, user_name, user_email in issues
]
embeddings = embed(
model="llama3.2",
input=issue_texts
)["embeddings"]
return embeddings, issue_texts
def store_embeddings(conn, issues, embeddings):
with conn.cursor() as cur:
for (issue_id, _, _, _, _, _), embedding in zip(issues, embeddings):
cur.execute(
"UPDATE issues SET embedding = %s WHERE id = %s",
(embedding, issue_id)
)
conn.commit()
print("Embeddings stored successfully.")
def search_issues(conn, question):
question_embedding = embed(model="llama3.2", input=question)["embeddings"][0]
with conn.cursor() as cur:
# find issues based on vector similarity (aka distance)
cur.execute("""
SELECT
issues.id,
issues.title,
issues.description,
users.name,
users.email,
embedding <-> %s::vector AS distance
FROM
issues
LEFT JOIN
users ON issues.user_id = users.id
ORDER BY
distance
LIMIT 5
""", (question_embedding,))
results = cur.fetchall()
return results
if __name__ == '__main__':
try:
config = load_config()
conn = connect(config)
if conn:
issues_to_embed = fetch_issues_without_embeddings(conn)
print(f"Found {len(issues_to_embed)} issues without embeddings.")
if issues_to_embed:
embeddings, issue_texts = create_embeddings(issues_to_embed)
store_embeddings(conn, issues_to_embed, embeddings)
question = "Who reported login issues or account-related problems?"
results = search_issues(conn, question)
most_relevant_issue_embedding = results[0][5]
print(f"Question: {question}")
print(f"Most relevant issue ({most_relevant_issue_embedding}):")
if results:
print(f"\tTitle: {results[0][1]}")
print(f"\tDescription: {results[0][2]}")
print(f"\tUser Name: {results[0][3]}")
else:
print("\tNo relevant issues found.")
except Exception as e:
print(f"An error occurred: {e}")
finally:
# close db conn
if conn:
conn.close()
print("Database connection closed.")
Editor is loading...
Leave a Comment