Untitled
unknown
plain_text
3 years ago
2.2 kB
13
Indexable
username = os.getlogin()
#change dataframe here to your input
higher_ed = pd.read_excel(r"C:\Users\{}\HIP Investor Dropbox\HIP Investor Team Folder\Ania Resources\Python\Data_Complete_Input.xlsx".format(username))
#Set this to the appropriate column for the classification differentiator
classification = list(higher_ed['Sector'].unique())
classification = list(filter(lambda value: type(value) != float or not math.isnan(value), classification))
print(classification)
#Set this list to be the metrics you want to histogram
metrics = ['...']
#Plotting histograms with detailed description
def plot_class(metric,classtype,ax, class_column):
data = higher_ed[higher_ed[class_column] == classtype][metric].dropna()
# check if data is not empty before plotting the histogram
if not data.empty:
n, bins, patches = ax.hist(data, bins=20, edgecolor='black', density=False, histtype='bar')
# add count of entities to each bar
for i, patch in enumerate(patches):
ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height() - 0.2,
int(n[i]), ha='center', va='bottom', fontsize=8)
# add a red line for the mean
ax.axvline(data.mean(), color='red', linestyle='dashed', linewidth=2)
# add a blue line for the median
ax.axvline(data.median(), color='blue', linestyle='dashed', linewidth=2)
# add a title to the plot
max_val = max(data)
ax.set_title('\n'.join([f"{classtype}",
f"(0 - {max_val:.1f})"]),
fontsize='small')
# Spliting data describtion to remove name
data_des = '{}'.format(data.describe())
data_des_lines = data_des.splitlines()
data_des_str = '\n'.join(data_des_lines[0:len(data_des_lines) - 1])
# add a textbox with class type and data description
textstr = '\n'.join((
'{}'.format(classtype),
'{}'.format(data_des_str)
))
props = dict(boxstyle='round', facecolor='white', alpha=0.5)
ax.text(0.5, 0.95, textstr, transform=ax.transAxes, fontsize=8,
verticalalignment='top', bbox=props)Editor is loading...