Untitled
def get_document_embedding_to_chunk_mapping(entity_id, embedding_to_chunk_content={}): query=f""" SELECT cmetadata,embedding,"document" FROM fb7d848f95e2.langchain_pg_embedding where lower(cmetadata ::text) ilike '%"entity_id"%"{entity_id}"%' """ cursor.execute(query) for chunk in cursor.fetchall(): embedding=tuple(json.loads(chunk[1])) chunk_content=chunk[2] embedding_to_chunk_content[embedding]=chunk_content return embedding_to_chunk_content def get_document_clusters(entity_ids): embedding_to_chunk_content={} for entity in entity_ids: embedding_to_chunk_content=get_document_embedding_to_chunk_mapping(entity_id, embedding_to_chunk_content) clustering = DBSCAN(eps=0.3, min_samples=37,metric="cosine").fit(X) return clustering.labels_
Leave a Comment