Untitled
unknown
plain_text
5 months ago
4.4 kB
5
Indexable
import ast # For safely evaluating the string representation of dictionaries/lists # Function to process the 'Related Persons' data def process_related_persons(related_persons_data): columns = {} # Initialize counters for Executive Officer numbering executive_officer_count = 1 for person in related_persons_data: person_name = person.get('Related Person Name', 'N/A') person_address = person.get('Related Person Address', 'N/A') person_city = person.get('Related Person City', 'N/A') person_relationships = person.get('Related Person Relationships', 'N/A') # Create the detailed person information string person_details = f"Related Person Name: {person_name}\n" \ f"Related Person Address: {person_address}\n" \ f"Related Person City: {person_city}\n" \ f"Related Person Relationships: {person_relationships}" # Assign to appropriate columns based on roles relationships = [rel.strip() for rel in person_relationships.split(',')] if 'Executive Officer' in relationships: column_name = f'Executive Officer {executive_officer_count}' columns[column_name] = person_details executive_officer_count += 1 if 'Director' in relationships: columns['Executive Officer: Director'] = person_details if 'Promoter' in relationships: columns['Executive Officer: Promoter'] = person_details # Ensure numbering continuity if there are more persons than initial columns for i in range(executive_officer_count, 8): columns[f'Executive Officer {i}'] = 'N/A' return columns # Iterate through xml_data_list to process each entry rows = [] for xml_data in xml_data_list: root = ET.fromstring(xml_data) # Initialize a dictionary to hold the row data row = { 'CIK': safe_find_text(root, './/primaryIssuer/cik'), 'Entity Name': safe_find_text(root, './/primaryIssuer/entityName'), 'Issuer Address': safe_find_text(root, './/primaryIssuer/issuerAddress/street1'), 'City': safe_find_text(root, './/primaryIssuer/issuerAddress/city'), 'State': safe_find_text(root, './/primaryIssuer/issuerAddress/stateOrCountry'), 'Zip Code': safe_find_text(root, './/primaryIssuer/issuerAddress/zipCode'), 'Issuer Phone Number': safe_find_text(root, './/primaryIssuer/issuerPhoneNumber'), 'Jurisdiction of Incorporation': safe_find_text(root, './/primaryIssuer/jurisdictionOfInc'), 'Year of Incorporation': safe_find_text(root, './/primaryIssuer/yearOfInc/value'), 'Offering Amount': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalOfferingAmount'), 'Amount Sold': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalAmountSold'), 'Amount Remaining': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalRemaining'), 'Date of First Sale': safe_find_text(root, './/offeringData/typeOfFiling/dateOfFirstSale/value'), 'Has Non-Accredited Investors': safe_find_text(root, './/offeringData/investors/hasNonAccreditedInvestors'), 'Number Already Invested': safe_find_text(root, './/offeringData/investors/totalNumberAlreadyInvested') } # Extract related persons as a list of dictionaries related_persons = root.findall('.//offeringData/relatedPersonsList/relatedPersonInfo') related_persons_list = [] for person in related_persons: related_persons_list.append({ 'Related Person Name': f"{safe_find_text(person, './/relatedPersonName/firstName')} {safe_find_text(person, './/relatedPersonName/lastName')}", 'Related Person Address': safe_find_text(person, './/relatedPersonAddress/street1'), 'Related Person City': safe_find_text(person, './/relatedPersonAddress/city'), 'Related Person Relationships': ", ".join([rel.text for rel in person.findall('.//relatedPersonRelationshipList/relationship')]) }) # Process related persons data into role-based columns role_columns = process_related_persons(related_persons_list) row.update(role_columns) rows.append(row) # Convert the list of rows to a pandas DataFrame df = pd.DataFrame(rows) # Display the resulting DataFrame
Editor is loading...
Leave a Comment