Untitled
unknown
plain_text
a year ago
1.9 kB
2
Indexable
Never
def loadParquetAsTable(con,file_name,bucket_name): """ # loadParquetAsTable is used to load the parquet file # from minio (S3) into a duckDB table. # """ client = s3_setup() # Check if the bucket_name exists in the S3 try: response = client.list_objects(Bucket=bucket_name) except Exception: raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} does not exist") # Check if file exists in the S3 bucket if 'Contents' in response: file_exists = False for obj in response['Contents']: if obj['Key'] == file_name: with tempfile.NamedTemporaryFile() as temp_file: # client.download_file(bucket_name, file_name, temp_file.name) try: client.download_file(bucket_name, file_name, temp_file.name) except Exception as e: # Handle download error raise HTTPException(status_code=500, detail=f"Error downloading file from S3: {e}") # Generate the table name from the uploaded file table_name = file_name.split(".")[0] # Remove file extension con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM parquet_scan('{temp_file.name}')") file_exists = True break if not file_exists: raise HTTPException(status_code=404, detail=f"File {file_name} does not exist in S3 bucket {bucket_name}") exit() else: # Bucket is empty raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} is empty") exit()