Untitled

 avatar
unknown
plain_text
2 months ago
1.7 kB
2
Indexable
def read_column (filename, col):
    with open(filename) as file:
        result = []
        next(file)
        for line in file:
            row = line.strip().split(',')
            result.append(int(row[col]))
            return result
def standardize(data):
    min_val = min(data)
    max_val = max(data)
    if max_val == min_val:
        return [0.0] * len(data)
    return [(x - min_val) / (max_val - min_val) for x in data]

def gradient_descent(x, y, lr, a=0, b=0):
    # a: intercept
    # b: coefficient
    tolerance = 0.0001 #stop when step size < 0.0001
    n = len(y) #number of data point

    while True:
        total_error_a = 0
        total_error_b = 0
        for i in range(n):
            y_pred = b*x[i] + a  # Hypothesis function
            error = y_pred - y[i]
            #compute gradients
            total_error_b += error * x[i]
            total_error_a += error
        #compute step size
        step_b = (lr * 2 * total_error_b) / n
        step_a = (lr * 2 * total_error_a) / n
        #update parameters
        b -= step_b
        a -= step_a
        # Check stopping condition
        if abs(step_b) < tolerance and abs(step_a) < tolerance:
            break  # Stop if both step sizes are small enough
    return a, b

sizes = read_column('portland_housing_full.csv', 0)
prices = read_column ('portland_housing_full.csv', 2)

sizes_std = standardize(sizes)
print(sizes_std)
prices_std = standardize(prices)

a,b =gradient_descent (sizes_std, prices_std, 0.01)

print(f'The GD estimate of regression of Price on Size is price = {a:.4f} + {b:.4f} * size.')





Editor is loading...
Leave a Comment