Untitled
unknown
plain_text
a year ago
851 B
6
Indexable
def get_document_embedding_to_chunk_mapping(entity_id, embedding_to_chunk_content={}):
query=f"""
SELECT cmetadata,embedding,"document"
FROM fb7d848f95e2.langchain_pg_embedding
where lower(cmetadata ::text) ilike '%"entity_id"%"{entity_id}"%'
"""
cursor.execute(query)
for chunk in cursor.fetchall():
embedding=tuple(json.loads(chunk[1]))
chunk_content=chunk[2]
embedding_to_chunk_content[embedding]=chunk_content
return embedding_to_chunk_content
def get_document_clusters(entity_ids):
embedding_to_chunk_content={}
for entity in entity_ids:
embedding_to_chunk_content=get_document_embedding_to_chunk_mapping(entity_id, embedding_to_chunk_content)
clustering = DBSCAN(eps=0.3, min_samples=37,metric="cosine").fit(X)
return clustering.labels_
Editor is loading...
Leave a Comment