多项式回归

算法思想

如果你的数据比直线更复杂怎么办？例如抛物线。显然线性回归无法胜任这个工作，但我们只需要稍加修改，将每个特征的幂次方作为一个新特征即可，大致与线性回归类似，不过是多元一次和多元多次的区别。

线性回归假设目标拟合曲线函数为 $y = w_{1} x_{1} + w_{2} x_{2} + w_{3} x_{3} + \dots + b$ ，其中 $w_{n}$ 为系数 $x_{n}$ 为特征。

而多项式回归假设目标拟合曲线为 $y = w_{1} x_{1}^{n} + w_{2} x_{1}^{n - 1} x_{2} + w_{3} x_{1}^{n - 2} x_{2}^{2} + \dots + w_{n} x_{2}^{n}$ ，这里出于简便只写了两个未知数的情况。

实例

这里我用的是三次函数的图像外加了点噪声，用线性回归肯定是拟合不了的。

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

x = np.arange(-10, 10, 0.1)
y = 2.6 * x ** 3 - 3.14 * x ** 2 - 8.8 * x + np.random.normal(0, 700, (200, ))

plt.scatter(x, y)
plt.show()

X = x.reshape([-1, 1])
X = np.hstack([X ** 3, X ** 2, X])
X_b = np.hstack([np.ones([len(y), 1]), X])
# X [200, 3]
print(X.shape)

如算法思想所述，梯度下降的代码是没有改变的，只是添加了特征的幂指数作为额外的特征值。

def J(X_b, y, theta):
    return np.mean((X_b.dot(theta) - y) ** 2);

def DJ(X_b, y, theta):
    return X_b.T.dot((X_b.dot(theta) - y)) * 2 / len(y);

def gradient_descent(X_b, y, theta, eta = 0.01, n_iters = 1e4, epsilon = 1e-6):
    i_iter = 0
    
    while i_iter < n_iters:
        gradient = DJ(X_b, y, theta)
        last_theta = theta
        theta = theta - eta * gradient
        if np.abs(J(X_b, y, theta) - J(X_b, y, last_theta)) < epsilon:
            break
        i_iter += 1
        
    return theta

1 2	initial_theta = np.zeros(X_b.shape[1]) theta = gradient_descent(X_b, y, initial_theta, eta = 1e-6, n_iters = 1e5)

得出 theta = array([-12.65120346, 2.89719563, -3.12234806, -15.72650818])

也基本对应了我预设的函数系数[2.6, -3.14, -8.8]

注：theta 每一项分别对应了截距， $x^{3}$ 的系数， $x^{2}$ 的系数， $x$ 的系数。

pred_y = theta[1] * x ** 3 + theta[2] * x ** 2 + theta[3] * x + theta[0]
plt.scatter(x, y)
plt.plot(x, pred_y, color = "r")
plt.show()

代码实现

需要注意的是多项式化是成阶乘式的增长，比指数爆炸更可怕！

$\frac{(n + d)!}{d! n!}$ ，其中 n 为原本特征数，d 为最高次数。

class PolynomialRegession:
    
    # 生成多项式特征
    def generatePolynomialFeatures(self, X, degree):
        self.degree = degree
        if degree == 0:
            return np.ones_like(X)
        
        n = X.shape[1]
        ans = np.array(X)
        if n > 1:
            for i in range(n):
                ans = np.hstack([ans, (X[:, i] ** degree).reshape(-1,1)])
                for j in range(i+1, n):
                    for k in range(1, degree):
                        ans = np.hstack([ans, (X[:, i] ** k * X[:, j] ** (degree - k)).reshape(-1,1)])
        else:
            for k in range(2, degree + 1):
                ans = np.hstack([ans, X ** k])

        return np.hstack([ans, np.ones(ans.shape[0]).reshape([-1, 1])])
    
    # 均值方差归一化 -> avg = 0, std = 1
    def standardization(self, X):
        tmp = X[:, :-1]
        return np.hstack([(tmp - np.mean(tmp, axis=0)) / np.std(tmp, axis = 0), np.ones([len(X), 1])])
    
    # 使用MSE作为损失函数
    def f(self, X_b, y, theta):
        return np.mean((X_b @ theta - y) ** 2)

    # 损失函数的导数
    def df(self, X_b, y, theta):
        return X_b.T @(X_b @ theta - y) * 2 / len(y)

    # 梯度下降
    def gd(self, X_b, y, theta, eta = 0.01, n_iters = 1e4, epsilon = 1e-6, plot_process = True):
        i_iter = 0

        while i_iter < n_iters:
            if ((plot_process and (i_iter + 1) % 1000 == 0) and (i_iter + 1) <= 10000) or (i_iter + 1) == n_iters:
                display.clear_output(wait=True)
                plt.scatter(x, y1)
                plt.plot(x, (X_b @ theta).reshape(x.shape), c="green", label = 'predict')
                plt.plot(x, y2, c="red", label = 'truth')
                plt.legend()
                plt.title("degree = " + str(self.degree))
                plt.show()
                plt.pause(0.5)

            gradient = self.df(X_b, y, theta)
            last_theta = theta
            theta = theta - eta * gradient
            if abs(self.f(X_b, y, last_theta) - self.f(X_b, y, theta)) < epsilon:
                break
            i_iter += 1

        return theta
    
    # 拟合
    def fit(self, X, y, degree, eta = 1e-4, n_iters = 1e4, epsilon = 1e-6, plot_process = True):
        self.X_b = self.generatePolynomialFeatures(X, degree)
        self.X_b = self.standardization(self.X_b)
        initial_theta = np.zeros_like(self.X_b[1])
        self.theta = self.gd(self.X_b, y, initial_theta, eta=eta, n_iters=n_iters, epsilon=epsilon, plot_process=plot_process)
        self.coefficient = self.theta[:-1]
        self.intercept = self.theta[-1]
    
    # 预测
    def predict(self, X):
        return self.standardization(self.generatePolynomialFeatures(X, self.degree) @ self.theta)
    
    # 预测分析
    def score(self, X, y):
        return np.mean((predict(X) - y) ** 2)

测试

poly_reg = PolynomialRegression(3)
poly_reg.fit(x.reshape([-1, 1]), y, eta = 1e-6, n_iters = 1e5)

theta = poly_reg.theta
pred_y = theta[3] * x ** 3 + theta[2] * x ** 2 + theta[1] * x + theta[0]
plt.scatter(x, y)
plt.plot(x, pred_y, color = "r")
plt.show()

结果如出一辙。