Univariate linear regression in Python
Simple Linear Regression Math by Hand
# Define data
X = [1000, 4000, 5000, 4500, 3000, 4000, 9000, 11000, 15000, 12000, 7000, 3000]
Y = [9914, 40487, 54324, 50044, 34719, 42551, 94871, 118914, 158484, 131348, 78504, 36284]
# Step 1
avg_X = sum(X) / len(X)
# Step 4
avg_Y = sum(Y) / len(Y)
# Steps 2, 3 & 5
SSxx = 0
SSxy = 0
for x, y in zip(X, Y):
SSxx += (avg_X - x)**2 # SSxx = SSxx + (avg_X - x)**2
SSxy += (avg_X - x) * (avg_Y - y)
# Step 5.5
slope = SSxy / SSxx
# Step 6
intercept = avg_Y - slope * avg_X
print(f"y = {slope} * x + {intercept}")
# Final linear regression equation is y = slope * x + intercept
print(slope * 1000 + intercept)