Untitled
unknown
plain_text
2 years ago
2.2 kB
4
Indexable
Never
username = os.getlogin() #change dataframe here to your input higher_ed = pd.read_excel(r"C:\Users\{}\HIP Investor Dropbox\HIP Investor Team Folder\Ania Resources\Python\Data_Complete_Input.xlsx".format(username)) #Set this to the appropriate column for the classification differentiator classification = list(higher_ed['Sector'].unique()) classification = list(filter(lambda value: type(value) != float or not math.isnan(value), classification)) print(classification) #Set this list to be the metrics you want to histogram metrics = ['...'] #Plotting histograms with detailed description def plot_class(metric,classtype,ax, class_column): data = higher_ed[higher_ed[class_column] == classtype][metric].dropna() # check if data is not empty before plotting the histogram if not data.empty: n, bins, patches = ax.hist(data, bins=20, edgecolor='black', density=False, histtype='bar') # add count of entities to each bar for i, patch in enumerate(patches): ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height() - 0.2, int(n[i]), ha='center', va='bottom', fontsize=8) # add a red line for the mean ax.axvline(data.mean(), color='red', linestyle='dashed', linewidth=2) # add a blue line for the median ax.axvline(data.median(), color='blue', linestyle='dashed', linewidth=2) # add a title to the plot max_val = max(data) ax.set_title('\n'.join([f"{classtype}", f"(0 - {max_val:.1f})"]), fontsize='small') # Spliting data describtion to remove name data_des = '{}'.format(data.describe()) data_des_lines = data_des.splitlines() data_des_str = '\n'.join(data_des_lines[0:len(data_des_lines) - 1]) # add a textbox with class type and data description textstr = '\n'.join(( '{}'.format(classtype), '{}'.format(data_des_str) )) props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.5, 0.95, textstr, transform=ax.transAxes, fontsize=8, verticalalignment='top', bbox=props)