Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.8 kB
1
Indexable
Never
import pandas as pd

def generating_pairs_from_grouped_df(merged_df):

    allSequences = []

    for keywords, numbers, dimensions, quantities, years in zip(merged_df['all_keywords'], merged_df['all_numbers'], merged_df['all_dimensions'], merged_df["all_quantities"], merged_df['all_years']):

        if type(keywords) == list:

            keywords = [str(keyword).lower().replace(' ', '') for keyword in keywords] # возможно нужно будет сделать предобработку других значений
            numbers = [number if pd.isna(number) == False else None for number in numbers]
            dimensions = [dimension if pd.isna(dimension) == False else None for dimension in dimensions]
            quantities = [quantity if pd.isna(quantity) == False else None for quantity in quantities]
            years = [year if pd.isna(year) == False else None for year in years]

            sequences = {}

            for keyword, number, dimension, quantity, year in zip(keywords, numbers, dimensions, quantities, years):
                if keyword in sequences:
                    if number in sequences[keyword]:
                        if (dimension, quantity) in sequences[keyword][number]:
                            sequences[keyword][number][(dimension, quantity)].append(year)
                        else:
                            sequences[keyword][number][(dimension, quantity)] = [year]
                    else:
                        sequences[keyword][number] = {}
                else:
                    sequences[keyword] = {}
                    sequences[keyword][number] = {}
                    sequences[keyword][number][(dimension, quantity)] = [year]

            allSequences.append(sequences)

        else:
            allSequences.append(None)

    return allSequences