Untitled
unknown
plain_text
a year ago
4.4 kB
6
Indexable
import ast # For safely evaluating the string representation of dictionaries/lists
# Function to process the 'Related Persons' data
def process_related_persons(related_persons_data):
columns = {}
# Initialize counters for Executive Officer numbering
executive_officer_count = 1
for person in related_persons_data:
person_name = person.get('Related Person Name', 'N/A')
person_address = person.get('Related Person Address', 'N/A')
person_city = person.get('Related Person City', 'N/A')
person_relationships = person.get('Related Person Relationships', 'N/A')
# Create the detailed person information string
person_details = f"Related Person Name: {person_name}\n" \
f"Related Person Address: {person_address}\n" \
f"Related Person City: {person_city}\n" \
f"Related Person Relationships: {person_relationships}"
# Assign to appropriate columns based on roles
relationships = [rel.strip() for rel in person_relationships.split(',')]
if 'Executive Officer' in relationships:
column_name = f'Executive Officer {executive_officer_count}'
columns[column_name] = person_details
executive_officer_count += 1
if 'Director' in relationships:
columns['Executive Officer: Director'] = person_details
if 'Promoter' in relationships:
columns['Executive Officer: Promoter'] = person_details
# Ensure numbering continuity if there are more persons than initial columns
for i in range(executive_officer_count, 8):
columns[f'Executive Officer {i}'] = 'N/A'
return columns
# Iterate through xml_data_list to process each entry
rows = []
for xml_data in xml_data_list:
root = ET.fromstring(xml_data)
# Initialize a dictionary to hold the row data
row = {
'CIK': safe_find_text(root, './/primaryIssuer/cik'),
'Entity Name': safe_find_text(root, './/primaryIssuer/entityName'),
'Issuer Address': safe_find_text(root, './/primaryIssuer/issuerAddress/street1'),
'City': safe_find_text(root, './/primaryIssuer/issuerAddress/city'),
'State': safe_find_text(root, './/primaryIssuer/issuerAddress/stateOrCountry'),
'Zip Code': safe_find_text(root, './/primaryIssuer/issuerAddress/zipCode'),
'Issuer Phone Number': safe_find_text(root, './/primaryIssuer/issuerPhoneNumber'),
'Jurisdiction of Incorporation': safe_find_text(root, './/primaryIssuer/jurisdictionOfInc'),
'Year of Incorporation': safe_find_text(root, './/primaryIssuer/yearOfInc/value'),
'Offering Amount': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalOfferingAmount'),
'Amount Sold': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalAmountSold'),
'Amount Remaining': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalRemaining'),
'Date of First Sale': safe_find_text(root, './/offeringData/typeOfFiling/dateOfFirstSale/value'),
'Has Non-Accredited Investors': safe_find_text(root, './/offeringData/investors/hasNonAccreditedInvestors'),
'Number Already Invested': safe_find_text(root, './/offeringData/investors/totalNumberAlreadyInvested')
}
# Extract related persons as a list of dictionaries
related_persons = root.findall('.//offeringData/relatedPersonsList/relatedPersonInfo')
related_persons_list = []
for person in related_persons:
related_persons_list.append({
'Related Person Name': f"{safe_find_text(person, './/relatedPersonName/firstName')} {safe_find_text(person, './/relatedPersonName/lastName')}",
'Related Person Address': safe_find_text(person, './/relatedPersonAddress/street1'),
'Related Person City': safe_find_text(person, './/relatedPersonAddress/city'),
'Related Person Relationships': ", ".join([rel.text for rel in person.findall('.//relatedPersonRelationshipList/relationship')])
})
# Process related persons data into role-based columns
role_columns = process_related_persons(related_persons_list)
row.update(role_columns)
rows.append(row)
# Convert the list of rows to a pandas DataFrame
df = pd.DataFrame(rows)
# Display the resulting DataFrame
Editor is loading...
Leave a Comment