Untitled
unknown
plain_text
3 years ago
1.9 kB
9
Indexable
def loadParquetAsTable(con,file_name,bucket_name):
"""
# loadParquetAsTable is used to load the parquet file
# from minio (S3) into a duckDB table.
# """
client = s3_setup()
# Check if the bucket_name exists in the S3
try:
response = client.list_objects(Bucket=bucket_name)
except Exception:
raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} does not exist")
# Check if file exists in the S3 bucket
if 'Contents' in response:
file_exists = False
for obj in response['Contents']:
if obj['Key'] == file_name:
with tempfile.NamedTemporaryFile() as temp_file:
# client.download_file(bucket_name, file_name, temp_file.name)
try:
client.download_file(bucket_name, file_name, temp_file.name)
except Exception as e:
# Handle download error
raise HTTPException(status_code=500, detail=f"Error downloading file from S3: {e}")
# Generate the table name from the uploaded file
table_name = file_name.split(".")[0] # Remove file extension
con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM parquet_scan('{temp_file.name}')")
file_exists = True
break
if not file_exists:
raise HTTPException(status_code=404, detail=f"File {file_name} does not exist in S3 bucket {bucket_name}")
exit()
else:
# Bucket is empty
raise HTTPException(status_code=404, detail=f"S3 bucket {bucket_name} is empty")
exit()Editor is loading...