Untitled
import logging import pandas as pd from pymongo import MongoClient import azure.functions as func from io import BytesIO # MongoDB connection details MONGO_URI = "your_mongodb_connection_string" DATABASE_NAME = "your_database_name" COLLECTION_NAME = "file_metadata" def save_to_mongodb(user_id, file_info): """Save file metadata to MongoDB.""" client = MongoClient(MONGO_URI) db = client[DATABASE_NAME] collection = db[COLLECTION_NAME] # Save the file info against the user_id file_info["user_id"] = user_id collection.insert_one(file_info) def extract_file_metadata(file_content, file_type): """Extract column names, schema, and sample rows from the file.""" if file_type == "csv": df = pd.read_csv(BytesIO(file_content)) elif file_type in ["xlsx", "xls"]: df = pd.read_excel(BytesIO(file_content)) else: raise ValueError("Unsupported file type") # Extract column names and schema columns = df.columns.tolist() schema = {col: str(df[col].dtype) for col in columns} sample_rows = df.head(5).to_dict(orient="records") return { "columns": columns, "schema": schema, "sample_rows": sample_rows } def main(req: func.HttpRequest) -> func.HttpResponse: logging.info("Processing file upload request.") try: # Parse user_id and file from the request user_id = req.params.get("user_id") if not user_id: return func.HttpResponse("Missing 'user_id' parameter.", status_code=400) file = req.files.get("file") if not file: return func.HttpResponse("No file uploaded.", status_code=400) file_content = file.stream.read() file_type = file.filename.split(".")[-1].lower() # Extract metadata file_info = extract_file_metadata(file_content, file_type) # Save metadata to MongoDB save_to_mongodb(user_id, file_info) # Return extracted information to the user return func.HttpResponse( body=str(file_info), status_code=200, mimetype="application/json" ) except Exception as e: logging.error(f"Error processing request: {str(e)}") return func.HttpResponse(f"Error: {str(e)}", status_code=500)
Leave a Comment