Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.6 kB
2
Indexable
Never
import matplotlib.pyplot as plt
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import OrdinalEncoder
from pandas.io.json import json_normalize
import pandas as pd
import json

from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
import datetime
import numpy as np
from pandas.io.json import json_normalize


class DataService:
    def __init__(self, json, metric):
        self.data = json
        # self.data = json.load(self.data)
        self.df = pd.DataFrame(self.data)
        self.metric = metric
        self.response = None
        self.feat_importances = None
        # self.get_importance()

    def isNumeric(self, df_son):
        for i in df_son.columns:
            try:
                try:
                    df_son[i] = pd.to_datetime(df_son[i], format='%Y-%m-%d')
                    df_son[i] = df_son[i].dt.month
                    categorical_subset_1 = df_son[[i]]
                    categorical_subset_1 = pd.get_dummies(categorical_subset_1)
                    df_son.drop(labels=[i], axis=1, inplace=True)
                    df_son = pd.concat([df_son, categorical_subset_1], axis=1)
                except:
                    df_son[i] = df_son[i].astype('float64')
                else:
                    continue
            except:
                categorical_subset_2 = df_son[[i]]
                categorical_subset_2 = pd.get_dummies(categorical_subset_2)
                df_son.drop(labels=[i], axis=1, inplace=True)
                df_son = pd.concat([df_son, categorical_subset_2], axis=1)
        return df_son

    def get_importance(self):



        df_son = []
        df_son = pd.DataFrame(df_son)

        x = self.df['metric_display_name'].unique()
        a = len(x)
        k = 0
        print('starts')
        for i in range(0, int(len(self.df) / a)):
            for j in range(0, int(a)):
                df_son.at[i, j] = self.df['value'].iloc[k]
                k = k + 1

        df_son = pd.DataFrame(df_son)
        df_son.set_axis(x, axis='columns', inplace=True)

        print('df son')
        X = df_son.drop([self.metric], axis=1)
        y = df_son[self.metric]

        X = self.isNumeric(X)
        model = ExtraTreesClassifier()
        model.fit(X, y)
        feat_importances = pd.Series(model.feature_importances_, index=X.columns)
        feature_importance_json = pd.Series.to_json(feat_importances)
        print('feature of importance')
        print(feature_importance_json)
        return feature_importance_json