Untitled

mail@pastecode.io avatar
unknown
plain_text
2 months ago
2.4 kB
1
Indexable
Never
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77944 entries, 0 to 77943
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   transaction_date            77944 non-null  datetime64[ns]
 1   agent_inn_dil               77944 non-null  float64       
 2   client_inn_dil              77944 non-null  object        
 3   client_name_dil             77944 non-null  object        
 4   client_name_GZPN            77944 non-null  object        
 5   total_monthly_ton_dil       77944 non-null  float64       
 6   MO_dealer_monthly_ton_GZPN  77944 non-null  float64       
 7   KP_dealer_monthly_ton_GZPN  77944 non-null  float64       
 8   okved_client                77944 non-null  object        
 9   label                       77944 non-null  object        
dtypes: datetime64[ns](1), float64(4), object(5)
memory usage: 5.9+ MB




anomalies = df_anomaly[df_anomaly['anomaly'] == -1]
anomalies_grouped = anomalies.groupby('client_inn_dil').agg({
    'MO_dealer_monthly_ton_GZPN':'mean',
    'KP_dealer_monthly_ton_GZPN':'mean',
    'total_monthly_ton_dil':'mean',
    'label':'first',
    'client_name_dil':'first',
    'okved_client':'first'}).reset_index()

anomalies_grouped_sorted = anomalies_grouped.sort_values(by='total_monthly_ton_dil', ascending=False).reset_index(drop=True)
anomalies_grouped_sorted


anomalies_grouped_sorted.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   client_inn_dil              119 non-null    object 
 1   MO_dealer_monthly_ton_GZPN  119 non-null    float64
 2   KP_dealer_monthly_ton_GZPN  119 non-null    float64
 3   total_monthly_ton_dil       119 non-null    float64
 4   label                       119 non-null    object 
 5   client_name_dil             119 non-null    object 
 6   okved_client                119 non-null    object 
dtypes: float64(3), object(4)
memory usage: 6.6+ KB


anomalies_grouped_sorted[anomalies_grouped_sorted['label'].isin(['dil_MO+KP', 'dil_MO'])].describe()
anomalies_grouped_sorted[anomalies_grouped_sorted['label'].isin(['dil_MO+KP', 'dil_MO'])].describe()







df_new_animalies = df[
Leave a Comment