import numpy as np
import plotly.graph_objects as go
# ------------------------------------------------------------------------------
# 1. Generate exactly collinear data
# ------------------------------------------------------------------------------
np.random.seed(42)
n = 50
# x2 is a perfect linear function of x1: x2 = 3*x1
x1 = np.linspace(0, 1, n)
x2 = 3 * x1
# True model (effectively y depends on x1 only)
b0_true = 2.0
b1_true = 4.0
noise = 0.5 * np.random.randn(n)
y = b0_true + b1_true * x1 + noise
# ------------------------------------------------------------------------------
# 2. Solve least squares (infinite solutions due to collinearity)
# ------------------------------------------------------------------------------
# Design matrix: X = [1, x1, x2], with x2 = 3*x1
X = np.column_stack((np.ones(n), x1, x2))
beta, residuals, rank, s = np.linalg.lstsq(X, y, rcond=None)
b0_est, b1_est, b2_est = beta
print("A particular least-squares solution (minimum-norm):")
print(f" b0 = {b0_est:.3f}, b1 = {b1_est:.3f}, b2 = {b2_est:.3f}")
print(f"Rank of X = {rank} (expect 2, since one column is dependent)")
# ------------------------------------------------------------------------------
# 3. Define multiple hyperplanes by varying a free parameter t
# ------------------------------------------------------------------------------
# Because x2 = 3*x1, the perfect-fit condition is b1 + 3*b2 = constant.
# Let c = b1_est + 3*b2_est (the sum from the LLS solution).
# Then for each plane we let b2(t) = b2_est + t, b1(t) = c - 3*b2(t).
# ------------------------------------------------------------------------------
c = b1_est + 3*b2_est
t_values = np.linspace(-2, 2, 5)
# Create a figure
fig = go.Figure()
# 3D scatter of original data points
fig.add_trace(
go.Scatter3d(
x=x1,
y=x2,
z=y,
mode='markers',
marker=dict(size=4, color='blue'),
name='Data Points'
)
)
# A meshgrid for plotting surfaces
x1_grid = np.linspace(x1.min(), x1.max(), 20)
x2_grid = np.linspace(x2.min(), x2.max(), 20)
X1_surf, X2_surf = np.meshgrid(x1_grid, x2_grid)
# Plot several planes
for t in t_values:
b2_t = b2_est + t
b1_t = c - 3 * b2_t
Y_surf = b0_est + b1_t * X1_surf + b2_t * X2_surf
fig.add_trace(
go.Surface(
x=X1_surf,
y=X2_surf,
z=Y_surf,
opacity=0.5,
colorscale='Oranges',
showscale=False,
name=f"Plane t={t:.2f}",
)
)
# ------------------------------------------------------------------------------
# 4. Add the common intersection line
# ------------------------------------------------------------------------------
# All these planes intersect along the line where x2 = 3*x1 and
# y = b0_est + c*x1, with c = b1_est + 3*b2_est.
# Let's plot that "ridge" over a chosen x1 range.
# ------------------------------------------------------------------------------
x1_line = np.linspace(x1.min(), x1.max(), 50)
x2_line = 3 * x1_line
y_line = b0_est + c * x1_line # y = b0 + (b1 + 3*b2)*x1
fig.add_trace(
go.Scatter3d(
x=x1_line,
y=x2_line,
z=y_line,
mode='lines',
line=dict(color='red', width=5),
name='Common Intersection Line'
)
)
# Layout for a nice interactive 3D view
fig.update_layout(
title="Infinite Solutions: Exact Multicollinearity in Multiple Linear Regression",
scene=dict(
xaxis_title='x1',
yaxis_title='x2',
zaxis_title='y',
aspectmode='cube'
)
)
fig.show()
A particular least-squares solution (minimum-norm): b0 = 2.032, b1 = 0.371, b2 = 1.113 Rank of X = 2 (expect 2, since one column is dependent)
# ------------------------------------------------------------------------------
# 1. Generate two near-collinear datasets from the same underlying model
# ------------------------------------------------------------------------------
# np.random.seed(42)
# Number of points in each dataset
n = 50
# "near-collinearity" parameter
small_noise_scale = 0.05
# True underlying coefficients for the model: y = b0 + b1*x1 + b2*x2
b0_true = 2.0
b1_true = 3.0
b2_true = 1.5
# -------- Dataset A --------
x1_A = np.linspace(0, 1, n)
# near-collinear with x1_A
x2_A = 3*x1_A + small_noise_scale * np.random.randn(n)
noise_A = 0.5 * np.random.randn(n)
y_A = b0_true + b1_true*x1_A + b2_true*x2_A + noise_A
# -------- Dataset B --------
x1_B = np.linspace(0, 1, n)
# near-collinear with x1_B, but different noise
x2_B = 3*x1_B + small_noise_scale * np.random.randn(n)
noise_B = 0.5 * np.random.randn(n)
y_B = b0_true + b1_true*x1_B + b2_true*x2_B + noise_B
# ------------------------------------------------------------------------------
# 2. Fit each dataset independently using least squares
# ------------------------------------------------------------------------------
# Fit dataset A
X_A = np.column_stack((np.ones(n), x1_A, x2_A))
beta_A, _, _, _ = np.linalg.lstsq(X_A, y_A, rcond=None)
b0_est_A, b1_est_A, b2_est_A = beta_A
# Fit dataset B
X_B = np.column_stack((np.ones(n), x1_B, x2_B))
beta_B, _, _, _ = np.linalg.lstsq(X_B, y_B, rcond=None)
b0_est_B, b1_est_B, b2_est_B = beta_B
# Print the estimated coefficients
print("Dataset A estimated coefficients:")
print(f" b0 = {b0_est_A:.3f}, b1 = {b1_est_A:.3f}, b2 = {b2_est_A:.3f}")
print("\nDataset B estimated coefficients:")
print(f" b0 = {b0_est_B:.3f}, b1 = {b1_est_B:.3f}, b2 = {b2_est_B:.3f}")
# ------------------------------------------------------------------------------
# 3. Plot both datasets (scatter) + fitted planes in a single 3D figure
# ------------------------------------------------------------------------------
fig = go.Figure()
# ---- Scatter of Dataset A ----
fig.add_trace(
go.Scatter3d(
x=x1_A,
y=x2_A,
z=y_A,
mode='markers',
marker=dict(size=4, color='blue'),
name='Data A Points'
)
)
# ---- Scatter of Dataset B ----
fig.add_trace(
go.Scatter3d(
x=x1_B,
y=x2_B,
z=y_B,
mode='markers',
marker=dict(size=4, color='red'),
name='Data B Points'
)
)
# Create a meshgrid for plotting surfaces
grid_size = 20
x1_grid = np.linspace(0, 1, grid_size)
x2_grid = np.linspace(0, 3, grid_size) # since x2 ~ 3*x1
X1_surf, X2_surf = np.meshgrid(x1_grid, x2_grid)
# ---- Plane for Dataset A ----
Y_surf_A = (b0_est_A
+ b1_est_A * X1_surf
+ b2_est_A * X2_surf)
fig.add_trace(
go.Surface(
x=X1_surf,
y=X2_surf,
z=Y_surf_A,
opacity=0.4,
colorscale='Blues',
showscale=False,
name='Fitted Plane A'
)
)
# ---- Plane for Dataset B ----
Y_surf_B = (b0_est_B
+ b1_est_B * X1_surf
+ b2_est_B * X2_surf)
fig.add_trace(
go.Surface(
x=X1_surf,
y=X2_surf,
z=Y_surf_B,
opacity=0.4,
colorscale='Reds',
showscale=False,
name='Fitted Plane B'
)
)
# Final layout
fig.update_layout(
title="Two Near-Collinear Datasets and Their Fitted Planes",
scene=dict(
xaxis_title='x1',
yaxis_title='x2',
zaxis_title='y',
aspectmode='cube'
)
)
fig.show()
Dataset A estimated coefficients: b0 = 2.034, b1 = 11.165, b2 = -1.256 Dataset B estimated coefficients: b0 = 2.254, b1 = 4.857, b2 = 0.764
# ------------------------------------------------------------------------------
# 1. Generate two independent (non-collinear) datasets from the same model
# ------------------------------------------------------------------------------
# np.random.seed(42)
n = 50 # number of points in each dataset
# True underlying model: y = b0 + b1*x1 + b2*x2
b0_true = 2.0
b1_true = 3.0
b2_true = 1.5
# --- Dataset A ---
x1_A = np.random.rand(n) # uniform in [0, 1]
x2_A = np.random.rand(n) # uniform in [0, 1] (independent of x1)
noise_A = 0.5 * np.random.randn(n)
y_A = b0_true + b1_true * x1_A + b2_true * x2_A + noise_A
# --- Dataset B ---
x1_B = np.random.rand(n)
x2_B = np.random.rand(n)
noise_B = 0.5 * np.random.randn(n)
y_B = b0_true + b1_true * x1_B + b2_true * x2_B + noise_B
# ------------------------------------------------------------------------------
# 2. Fit each dataset with least squares
# ------------------------------------------------------------------------------
# Fit dataset A
X_A = np.column_stack((np.ones(n), x1_A, x2_A))
beta_A, _, _, _ = np.linalg.lstsq(X_A, y_A, rcond=None)
b0_est_A, b1_est_A, b2_est_A = beta_A
# Fit dataset B
X_B = np.column_stack((np.ones(n), x1_B, x2_B))
beta_B, _, _, _ = np.linalg.lstsq(X_B, y_B, rcond=None)
b0_est_B, b1_est_B, b2_est_B = beta_B
print("Dataset A estimated coefficients:")
print(f" b0 = {b0_est_A:.3f}, b1 = {b1_est_A:.3f}, b2 = {b2_est_A:.3f}")
print("\nDataset B estimated coefficients:")
print(f" b0 = {b0_est_B:.3f}, b1 = {b1_est_B:.3f}, b2 = {b2_est_B:.3f}")
# ------------------------------------------------------------------------------
# 3. Plot both datasets (scatter) + their fitted planes in a single 3D figure
# ------------------------------------------------------------------------------
fig = go.Figure()
# --- Scatter of Dataset A ---
fig.add_trace(
go.Scatter3d(
x=x1_A,
y=x2_A,
z=y_A,
mode='markers',
marker=dict(size=4, color='blue'),
name='Data A'
)
)
# --- Scatter of Dataset B ---
fig.add_trace(
go.Scatter3d(
x=x1_B,
y=x2_B,
z=y_B,
mode='markers',
marker=dict(size=4, color='red'),
name='Data B'
)
)
# Create a meshgrid for plotting surfaces
grid_size = 20
x1_grid = np.linspace(0, 1, grid_size)
x2_grid = np.linspace(0, 1, grid_size)
X1_surf, X2_surf = np.meshgrid(x1_grid, x2_grid)
# --- Plane for Dataset A ---
Y_surf_A = (b0_est_A
+ b1_est_A * X1_surf
+ b2_est_A * X2_surf)
fig.add_trace(
go.Surface(
x=X1_surf,
y=X2_surf,
z=Y_surf_A,
opacity=0.4,
colorscale='Blues',
showscale=False,
name='Plane A'
)
)
# --- Plane for Dataset B ---
Y_surf_B = (b0_est_B
+ b1_est_B * X1_surf
+ b2_est_B * X2_surf)
fig.add_trace(
go.Surface(
x=X1_surf,
y=X2_surf,
z=Y_surf_B,
opacity=0.4,
colorscale='Reds',
showscale=False,
name='Plane B'
)
)
fig.update_layout(
title="Two Datasets (No Collinearity) from the Same Model",
scene=dict(
xaxis_title='x1',
yaxis_title='x2',
zaxis_title='y',
aspectmode='cube'
)
)
fig.show()
Dataset A estimated coefficients: b0 = 2.049, b1 = 3.057, b2 = 1.416 Dataset B estimated coefficients: b0 = 1.714, b1 = 3.380, b2 = 1.573