import psycopg2
import pandas as pd
DATABASE = "tariff_digitization"
USER='postgres'
PASSWORD="7Yn4mFeLNDXz&tH"
HOST="hospital-tariff-application.ctg23hvzex2r.ap-south-1.rds.amazonaws.com"
PORT = '5432'
def check_connection():
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
cursor = conn.cursor()
cursor.execute("select version()")
data = cursor.fetchone()
print("Connection established to: ", data)
conn.close()
def check_data():
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
cursor = conn.cursor()
cursor.execute("""
SELECT *
FROM schedule_of_charges
""")
# Fetch all the table names
table_names = cursor.fetchall()
table_n = []
# Print the table names
for table_name in table_names:
print(table_name)
def check_table():
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
cursor = conn.cursor()
cursor.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
""")
# Fetch all the table names
table_names = cursor.fetchall()
table_n = []
# Print the table names
for table_name in table_names:
table_n.append(table_name[0])
print("Table with table name '",table_name[0],"' exists")
# check for table schedule_of_charges
# check for table schedule_of_charges
if not "schedule_of_charges" in table_n:
cursor.execute("""
CREATE TABLE schedule_of_charges(
serial_id SERIAL PRIMARY KEY,
hospital_id text not null,
document text not null,
page text not null,
table_number text not null,
s3url text not null,
service_code text,
category text,
subcategory text,
service_description text not null,
bounding_box text not null default '[]',
type_of_room text not null default 'tariff',
price_per_unit text not null,
remarks text,
los text,
inclusions text,
exclusions text,
start_date text not null,
end_date text not null default '2099-12-31',
tariff_type text not null,
room_mapping text,
confidence_score text,
standard_term text
)""")
conn.commit()
print("Tables created schedule_of_charges")
elif not "schedule_of_charges_audit_logs" in table_n:
cursor.execute("""CREATE TABLE schedule_of_charges_audit_logs(
id serial PRIMARY KEY,diff jsonb,updated_by character varying(255) COLLATE pg_catalog."default",updated_date timestamp with time zone DEFAULT now(),hospital_id text COLLATE pg_catalog."default",serial_id integer
)""")
conn.commit()
print("Tables created schedule_of_charges_audit_logs")
else:
print("Table Exists")
conn.close()
def drop_table():
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
cursor = conn.cursor()
sql = '''DROP TABLE schedule_of_charges '''
cursor.execute(sql)
print("Table dropped !")
conn.commit()
conn.close()
def delete_hid(hid):
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
cursor = conn.cursor()
cursor.execute(f'''DELETE FROM schedule_of_charges WHERE hospital_id='{hid}';''')
conn.commit()
print(f"Deleted HID {hid}")
conn.close()
import psycopg2.extras as extras
def execute_values(df):
table = 'schedule_of_charges'
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
tuples = [tuple(x) for x in df.to_numpy()]
cols = ','.join(list(df.columns))
# SQL query to execute
query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
cursor = conn.cursor()
try:
extras.execute_values(cursor, query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
print("the dataframe is inserted")
cursor.close()
def insert_df(df):
conn = psycopg2.connect(
database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
)
df.to_sql('schedule_of_charges',con = conn,
if_exists='append',
index=False,
method="postgres_upsert")
conn.commit()
conn.close()
def postgresmain(df):
check_connection() # check connection with postgres
check_table() # check if table exist or not ; if not create table
# df = pd.read_csv(path,index_col=0)
df.rename(columns={'table': 'table_number'}, inplace=True)
df.fillna("", inplace=True)
hid = df['hospital_id'].iloc[0]
delete_hid(hid) # if data exist with same hid delete data
df = df.astype(str)
execute_values(df) # insert dataframe to postgres
# main("/home/vaibhav/Downloads/docker_50144634_output/temp_interim/50144634/long_format.csv")
# drop_table()
import os
import csv
for i in os.listdir("/usr/src/app/code/data/long"):
try:
path = os.path.join("/usr/src/app/code/data/long", i)
try:
long_ = pd.read_csv(path, delimiter="|", quoting=csv.QUOTE_ALL, on_bad_lines="skip")
print(long_.shape)
except:
long_ = pd.read_csv(path)
postgresmain(long_)
except Exception as e:
print(path, e)