Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
5.7 kB
1
Indexable
import psycopg2
import pandas as pd

DATABASE = "tariff_digitization"
USER='postgres'
PASSWORD="7Yn4mFeLNDXz&tH"
HOST="hospital-tariff-application.ctg23hvzex2r.ap-south-1.rds.amazonaws.com"
PORT = '5432'
def check_connection():
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   cursor = conn.cursor()
   cursor.execute("select version()")
   data = cursor.fetchone()
   print("Connection established to: ", data)
   conn.close()



def check_data():
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   cursor = conn.cursor()

   cursor.execute("""
       SELECT *
       FROM schedule_of_charges
   """)

   # Fetch all the table names
   table_names = cursor.fetchall()
   table_n = []
   # Print the table names
   for table_name in table_names:
      print(table_name)

def check_table():
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   cursor = conn.cursor()

   cursor.execute("""
       SELECT table_name
       FROM information_schema.tables
       WHERE table_schema = 'public'
   """)

   # Fetch all the table names
   table_names = cursor.fetchall()
   table_n = []
   # Print the table names
   for table_name in table_names:
      table_n.append(table_name[0])
      print("Table with table name '",table_name[0],"' exists")

   # check for table  schedule_of_charges

   # check for table  schedule_of_charges
      if not "schedule_of_charges" in table_n:
            cursor.execute("""
                  CREATE TABLE schedule_of_charges(
    serial_id SERIAL PRIMARY KEY,
    hospital_id text not null,
    document text not null,
    page text not null,
    table_number text not null,
    s3url text not null,
    service_code text,
    category text,
    subcategory text,
    service_description text not null,
    bounding_box text not null default '[]',
    type_of_room text not null default 'tariff',
    price_per_unit text not null,
    remarks text,
    los text,
 inclusions text,
 exclusions text,
 start_date text not null,
 end_date text not null default '2099-12-31',
 tariff_type text not null,
 room_mapping text,
 confidence_score text,
 standard_term text
)""")
            conn.commit()
            print("Tables created schedule_of_charges")
      elif not "schedule_of_charges_audit_logs" in table_n:
            cursor.execute("""CREATE TABLE schedule_of_charges_audit_logs(
   id serial PRIMARY KEY,diff jsonb,updated_by character varying(255) COLLATE pg_catalog."default",updated_date timestamp with time zone DEFAULT now(),hospital_id text COLLATE pg_catalog."default",serial_id integer
      )""")
            conn.commit()
            print("Tables created schedule_of_charges_audit_logs")
      else:
            print("Table Exists")
            conn.close()


def drop_table():
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   cursor = conn.cursor()

   sql = '''DROP TABLE schedule_of_charges '''

   cursor.execute(sql)
   print("Table dropped !")

   conn.commit()
   conn.close()

def delete_hid(hid):
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   cursor = conn.cursor()
   cursor.execute(f'''DELETE FROM schedule_of_charges WHERE hospital_id='{hid}';''')
   conn.commit()
   print(f"Deleted HID {hid}")
   conn.close()

import psycopg2.extras as extras


def execute_values(df):
   table = 'schedule_of_charges'
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   tuples = [tuple(x) for x in df.to_numpy()]

   cols = ','.join(list(df.columns))
   # SQL query to execute
   query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
   cursor = conn.cursor()
   try:
      extras.execute_values(cursor, query, tuples)
      conn.commit()
   except (Exception, psycopg2.DatabaseError) as error:
      print("Error: %s" % error)
      conn.rollback()
      cursor.close()
      return 1
   print("the dataframe is inserted")
   cursor.close()
def insert_df(df):
   conn = psycopg2.connect(
      database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT
   )
   df.to_sql('schedule_of_charges',con = conn,
             if_exists='append',
             index=False,
             method="postgres_upsert")
   conn.commit()
   conn.close()


def postgresmain(df):
      check_connection()  # check connection with postgres
      check_table()  # check if table exist or not ; if not create table
      # df = pd.read_csv(path,index_col=0)
      df.rename(columns={'table': 'table_number'}, inplace=True)

      df.fillna("", inplace=True)
      hid = df['hospital_id'].iloc[0]
      delete_hid(hid)  # if data exist with same hid delete data
      df = df.astype(str)
      execute_values(df)  # insert dataframe to postgres


# main("/home/vaibhav/Downloads/docker_50144634_output/temp_interim/50144634/long_format.csv")
# drop_table()
import os
import csv

for i in os.listdir("/usr/src/app/code/data/long"):
      try:
            path = os.path.join("/usr/src/app/code/data/long", i)
            try:
                  long_ = pd.read_csv(path, delimiter="|", quoting=csv.QUOTE_ALL, on_bad_lines="skip")
                  print(long_.shape)
            except:
                  long_ = pd.read_csv(path)
            postgresmain(long_)
      except Exception as e:
            print(path, e)