from cassandra.cluster import Cluster
from cassandra.query import BatchStatement
from datetime import date, datetime, timedelta
import random
# Connect to Cassandra cluster
cluster = Cluster(['localhost'])
session = cluster.connect('your_keyspace')
# Define some data to insert
business_ids = list(range(10000, 20000))
random.shuffle(business_ids)
business_ids = ['business_{}'.format(i) for i in business_ids]
event_dates = [(date(2022, 1, 1) + timedelta(days=i)).isoformat() for i in range(365)]
keys = ['key_{}'.format(i) for i in range(1, 11)]
bu_ids = ['bu_{}'.format(i) for i in range(1, 6)]
op_ids = ['op_{}'.format(i) for i in range(1, 6)]
statuses = ['status_{}'.format(i) for i in range(1, 4)]
types = ['type_{}'.format(i) for i in range(1, 4)]
usernames = ['user_{}'.format(i) for i in range(1, 11)]
# Define the insert statement
insert = session.prepare("""
INSERT INTO your_table (hoscd_businessid, hoscd_event_date, hoscd_key, hoscd_buid, hoscd_createdtime,
hoscd_opid, hoscd_status, hoscd_type, hoscd_updatedtime, hoscd_username,
hoscd_value, hoscd_valuetype)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""")
# Insert 10,000 records
batch = BatchStatement()
for i in range(10000):
values = (
business_ids[i],
random.choice(event_dates),
random.choice(keys),
random.choice(bu_ids),
datetime.now(),
random.choice(op_ids),
random.choice(statuses),
random.choice(types),
datetime.now(),
random.choice(usernames),
b'binary_data',
'blob'
)
batch.add(insert, values)
if (i + 1) % 100 == 0:
session.execute(batch)
batch.clear()
if len(batch) > 0:
session.execute(batch)