Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.9 kB
2
Indexable
Never
def loadParquetAsTable(con,file_name,bucket_name):
        """
#         loadParquetAsTable is used to load the parquet file 
#         from minio (S3) into a duckDB table.
#     """
        client = s3_setup()
        # Check if the bucket_name exists in the S3
        try: 
            response = client.list_objects(Bucket=bucket_name)
        except Exception:
            raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} does not exist")
        # Check if file exists in the S3 bucket
        if 'Contents' in response:
            file_exists = False
            for obj in response['Contents']:
                if obj['Key'] == file_name:
                    with tempfile.NamedTemporaryFile() as temp_file:
                        # client.download_file(bucket_name, file_name, temp_file.name)
                        try:
                            client.download_file(bucket_name, file_name, temp_file.name)
                        except Exception as e:
                        # Handle download error
                            raise HTTPException(status_code=500, detail=f"Error downloading file from S3: {e}")
                        # Generate the table name from the uploaded file
                        table_name = file_name.split(".")[0] # Remove file extension
                        con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM parquet_scan('{temp_file.name}')")
                    file_exists = True
                    break
            if not file_exists:
                raise HTTPException(status_code=404, detail=f"File {file_name} does not exist in S3 bucket {bucket_name}")
                exit()

        else:
            # Bucket is empty
            raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} is empty")
            exit()