Untitled
unknown
plain_text
a year ago
2.5 kB
2
Indexable
Never
def build_tree(data, max_depth, min_samples_split, depth): """ This function will build the decision tree args: * data(type: DataFrame): the data you want to apply to the decision tree * max_depth: the maximum depth of a decision tree * min_samples_split: the minimum number of instances required to do partition * depth: the height of the current decision tree return: * subtree: the decision tree structure including root, branch, and leaf (with the attributes and thresholds) """ ig, threshold, feature = 0,0,"" subtree = {} # check the condition of current depth and the remaining number of samples if depth < max_depth and data.shape[0] > min_samples_split : # call find_best_split() to find the best combination ig, threshold, feature = find_best_split(data) # check the value of information gain is greater than 0 or not if ig > 0 : # update the depth depth = depth+1 # call make_partition() to split the data into two parts left, right = make_partition(data, feature, threshold) # print(left.shape[0], right.shape[0]) # If there is no data split to the left tree OR no data split to the left tree if left.shape[0] == 0 or right.shape[0] == 0 : # return the label of the majority label = feature return label else: question = "{} {} {}".format(feature, "<=", threshold) subtree = {question: []} # call function build_tree() to recursively build the left subtree and right subtree left_subtree = build_tree(left, max_depth, min_samples_split, depth) right_subtree = build_tree(right, max_depth, min_samples_split, depth) if left_subtree == right_subtree: subtree = left_subtree else: subtree[question].append(left_subtree) subtree[question].append(right_subtree) else: # return the label of the majority o,z = 0,0 for i in data['diabetes_mellitus']: if i == 1: o = o+1 else: z = z+1 label = 0 if(o>z): label = 1 else: label = 0 return label else: # return the label of the majority o,z = 0,0 for i in data['diabetes_mellitus']: if i == 1: o = o+1 else: z = z+1 label = 0 if(o>=z): label = 1 else: label = 0 return label return subtree