Untitled

mail@pastecode.io avatar
unknown
plain_text
18 days ago
1.6 kB
3
Indexable
Never
import pandas as pd
import logging

def get_paragraph_idx(data : list) -> dict:
    """
    Function to extract paragraph name and returns in form of dict
    Key  : seq number
    Value: paragraph name

    Parameter:
    data : List : List of statements

    Returns:
    Dict : Dictonary of seq no and paragraph name as values
    """
    logging.debug('Paragraph Name Extraction ==> Started')
    df = pd.Series(data)
    df = df[pd.notna(df.str.rstrip('SKIP1').str.rstrip().str.extract(pat='(^\\w).*\\.$', expand=False))]
    st = df[~df.str.contains('EXIT')].str.strip()#.str.split('-').str.get(0)
    st = pd.DataFrame({'st_index': st.index, 'st_par':st.str.split('-').str.get(0), 'name':st})
    ed = df[df.str.contains('EXIT')].str.strip().str.split('-').str.get(0)
    ed = pd.DataFrame({'ed_index': ed.index, 'ed_par':ed})
    dd = pd.merge(left=st, right=ed, left_on='st_par', right_on='ed_par', how='left')
    dd = dd[pd.notna(dd['ed_index'])]
    paragraph_dict = {}
    for i,d in dd.iterrows():
        for r in range(d['st_index'], int(d['ed_index'])+1):
            paragraph_dict[r] = d['name']
    logging.debug('Paragraph Name Extraction ==> Completed')
    return paragraph_dict

import pandas as pd
import logging

data = [
    "0000-MAIN.",
    "MOVE A TO B.",
    "ADD 1 TO C.",
    "0000-EXIT.",
    "0001-MAIN.",
    "PARA3-CALC.",
    "MULTIPLY X BY Y.",
    "0001-EXIT.",
    "0002-MAIN.",
    "PARA3-CALC.",
    "MULTIPLY X BY Y.",
    "0002-EXIT."
]

result = get_paragraph_idx(data)

# Print the output dictionary
print(result)
Leave a Comment