Untitled

 avatar
unknown
python
a year ago
2.4 kB
2
Indexable
import pandas as pd

# Sample DataFrame with mixed data
data = {
    'Instrument1': "{'tag1': {'units': 'unit1', 'description': 'desc1'}}",
    'Instrument2': "{'tag2': {'units': 'unit2', 'description': 'desc2'}}",
    'NonInstrument1': 'Some non-instrument data',
    'Instrument3': None,  # Missing data in this column
    'NonInstrument2': 'More non-instrument data',
}

# Create the DataFrame from the sample data
df = pd.DataFrame(data, index=[0])

# Initialize empty lists to store the extracted values
instrument_names = []
tags = []
units = []
descriptions = []

# Define a placeholder for missing values
placeholder = 'N/A'

# Iterate over each column (instrument) in the DataFrame
for column_name in df.columns:
    # Check if the column contains valid JSON data
    data_str = df[column_name].iloc[0]
    if isinstance(data_str, str) and data_str.startswith('{') and data_str.endswith('}'):
        try:
            data_dict = eval(data_str)  # Using eval to convert the string to a dictionary
            
            # Find the first key in the dictionary (assuming it's the 'tag')
            tag_key = list(data_dict.keys())[0]
            tag_data = data_dict.get(tag_key, {})
            
            units_value = tag_data.get('units', placeholder)
            description_value = tag_data.get('description', placeholder)
        except (SyntaxError, TypeError, IndexError):
            # Handle cases where JSON data is not valid or data is missing
            tag_key = placeholder
            units_value = placeholder
            description_value = placeholder
    else:
        # Handle non-dictionary data in the column
        tag_key = placeholder
        units_value = placeholder
        description_value = placeholder
    
    # Append the extracted values to the respective lists
    if tag_key != placeholder:
        # Only add columns that contain valid JSON data
        instrument_names.append(column_name)
        tags.append(tag_key)
        units.append(units_value)
        descriptions.append(description_value)

# Create a new DataFrame from the extracted values
result_df = pd.DataFrame({
    'Instrument Name': instrument_names,
    'Tag': tags,
    'Units': units,
    'Description': descriptions
})

# Display the resulting DataFrame
print(result_df)