Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
4.5 kB
1
Indexable
Never
from faker import Faker
import random
import psycopg2
from psycopg2.extras import Json
from psycopg2 import sql
import time


def chunks(data, size):
    """Split data into chunks of specified size."""
    for i in range(0, len(data), size):
        yield data[i:i + size]


def generate_districts(cursor, fake, num_districts):
    districts = ((fake.city(),) for _ in range(1, num_districts + 1))
    cursor.executemany(sql.SQL("INSERT INTO \"РАЙОНЫ\" (назв_рн) VALUES (%s);"), districts)


def generate_profiles(cursor, fake, num_profiles):
    profiles = ((fake.job(),) for _ in range(1, num_profiles + 1))
    cursor.executemany(sql.SQL("INSERT INTO \"ПРОФИЛИ\" (назв_пр) VALUES (%s);"), profiles)


def generate_owners(cursor, fake, num_owners, num_districts, chunk_size=100_000):
    for chunk in chunks(range(1, num_owners + 1), chunk_size):
        owners = [
            (fake.name(), fake.ssn() + fake.ssn(), fake.address(), random.randint(1, num_districts),
             Json({"Рост": random.randint(120, 220), "Вес": random.randint(30, 200)}))
            for _ in chunk
        ]
        print(chunk)
        cursor.executemany(
            sql.SQL("INSERT INTO \"ВЛАДЕЛЬЦЫ\" (фио, паспорт, адрес, район, инфо) VALUES (%s, %s, %s, %s, %s);"), owners
        )


def generate_cooperatives(cursor, fake, num_cooperatives, num_profiles, num_districts):
    cooperatives = ((fake.company(), random.randint(1, num_profiles), fake.text(), random.uniform(1000, 1000000000),
                     random.randint(1, 1000), random.randint(1, num_districts),
                     [fake.company() for _ in range(random.randint(0, 5))]) for _ in range(1, num_cooperatives + 1))
    cursor.executemany(sql.SQL(
        "INSERT INTO \"КООПЕРАТИВЫ\" (название, профиль, описание, устав_кап, число_раб, район, имущество) VALUES (%s, %s, %s, %s, %s, %s, %s);"),
        cooperatives)


def generate_ownership(cursor, fake, num_ownership, num_cooperatives, num_owners):
    cooperative_shares = {i: 0 for i in range(1, num_cooperatives + 1)}
    ownership = []

    for _ in range(1, num_ownership + 1):
        coop_id = random.randint(1, num_cooperatives)
        owner_id = random.randint(1, num_owners)
        share_size = random.uniform(0.05, 0.3)

        if cooperative_shares[coop_id] + share_size <= 1.0:
            ownership.append((coop_id, owner_id, _, share_size))
            cooperative_shares[coop_id] += share_size

    cursor.executemany(
        sql.SQL(
            "INSERT INTO \"ВЛАД_КОРП\" (номер_кп, номер_вл, номер_рег, разм_пая) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING;"),
        ownership)


def main():
    conn = psycopg2.connect(
        host="localhost",
        database="lab3",
        user="postgres",
        password="Vova2004")

    cur = conn.cursor()

    fake = Faker('ru_RU')

    num_districts = 1_001_000
    start_time = time.time()
    generate_districts(cur, fake, num_districts)
    end_time = time.time()
    print(f"generate_districts time: {end_time - start_time} seconds")
    conn.commit()

    num_profiles = 1_001_000
    start_time = time.time()
    generate_profiles(cur, fake, num_profiles)
    end_time = time.time()
    print(f"generate_profiles time: {end_time - start_time} seconds")
    conn.commit()

    num_owners = 100_001_000
    start_time = time.time()
    generate_owners(cur, fake, num_owners, num_districts)
    end_time = time.time()
    print(f"generate_owners time: {end_time - start_time} seconds")
    conn.commit()

    num_cooperatives = 1_500_000
    start_time = time.time()
    generate_cooperatives(cur, fake, num_cooperatives, num_profiles, num_districts)
    end_time = time.time()
    print(f"generate_cooperatives time: {end_time - start_time} seconds")
    conn.commit()

    num_ownership = 3_000_000
    start_time = time.time()
    generate_ownership(cur, fake, num_ownership, num_cooperatives, num_owners)
    end_time = time.time()
    print(f"generate_ownership time: {end_time - start_time} seconds")
    conn.commit()

    cur.close()
    conn.close()


if __name__ == "__main__":
    start_time = time.time()
    main()
    end_time = time.time()
    print(f"Total execution time: {end_time - start_time} seconds")

Leave a Comment