Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
2.2 kB
4
Indexable
Never
username = os.getlogin()
#change dataframe here to your input
higher_ed = pd.read_excel(r"C:\Users\{}\HIP Investor Dropbox\HIP Investor Team Folder\Ania Resources\Python\Data_Complete_Input.xlsx".format(username))

#Set this to the appropriate column for the classification differentiator
classification = list(higher_ed['Sector'].unique())
classification = list(filter(lambda value: type(value) != float or not math.isnan(value), classification))
print(classification)

#Set this list to be the metrics you want to histogram
metrics = ['...']

#Plotting histograms with detailed description
def plot_class(metric,classtype,ax, class_column):
    data = higher_ed[higher_ed[class_column] == classtype][metric].dropna()

    # check if data is not empty before plotting the histogram
    if not data.empty:
        n, bins, patches = ax.hist(data, bins=20, edgecolor='black', density=False, histtype='bar')

        # add count of entities to each bar
        for i, patch in enumerate(patches):
            ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height() - 0.2,
                    int(n[i]), ha='center', va='bottom', fontsize=8)

        # add a red line for the mean
        ax.axvline(data.mean(), color='red', linestyle='dashed', linewidth=2)

        # add a blue line for the median
        ax.axvline(data.median(), color='blue', linestyle='dashed', linewidth=2)

        # add a title to the plot
        max_val = max(data)
        ax.set_title('\n'.join([f"{classtype}",
                                f"(0 - {max_val:.1f})"]),
                                fontsize='small')

        # Spliting data describtion to remove name
        data_des = '{}'.format(data.describe())
        data_des_lines = data_des.splitlines()
        data_des_str = '\n'.join(data_des_lines[0:len(data_des_lines) - 1])

        # add a textbox with class type and data description
        textstr = '\n'.join((
            '{}'.format(classtype),
            '{}'.format(data_des_str)
        ))
        props = dict(boxstyle='round', facecolor='white', alpha=0.5)
        ax.text(0.5, 0.95, textstr, transform=ax.transAxes, fontsize=8,
                verticalalignment='top', bbox=props)