Source code for ML.LinearRegression.linear_regression

import numpy as np
import matplotlib.pyplot as plt
import math

TIME_SLEEP = 0.000000001


[docs]def train_sgd(X, y, alpha, w=None):
    """Trains a linear regression model using stochastic gradient descent.

    Parameters
    ----------
    X : numpy.ndarray
        Numpy array of data
    y : numpy.ndarray
        Numpy array of outputs. Dimensions are n * 1, where n is the number of rows
        in `X`.
    alpha : float
        Describes the learning rate.
    w : numpy.ndarray, optional
        The initial w vector (the default is zero).

    Returns
    -------
    w : numpy.ndarray
        Trained vector with dimensions (m + 1) * 1, where m is the number of
        columns in `X`.

    """

    X_b = np.hstack((np.ones((X.shape[0], 1)), X))

    previous_error = -1
    error = -1

    stop = False
    num_iters = 0

    if w is None:
        w = np.zeros((x.shape[1] + 1, 1))

    while not stop:
        for i in range(0, len(X)):
            w = w - alpha / len(X) * (np.dot(np.transpose(w),
                                      X_b[i].reshape(X_b.shape[1], 1)) -
                                      y[i]) * X_b[i].reshape(X_b.shape[1], 1)

            error = evaluate_error(X, y, w)
            if previous_error == -1:
                previous_error = error
            elif (math.fabs(error - previous_error) < 0.01 * previous_error and
                  num_iters > 10000):
                stop = True
                break

            previous_error = error
            num_iters += 1

    return w


[docs]def train(X, y):
    """Trains a linear regression model using linear algebra.

    Parameters
    ----------
    X : numpy.ndarray
        Numpy array of data
    y : numpy.ndarray
        Numpy array of outputs. Dimensions are n * 1, where n is the number of rows
        in `X`.

    Returns
    -------
    w : numpy.ndarray
        Trained vector with dimensions (m + 1) * 1, where m is the number of
        columns in `X`.

    """

    # Add bias term
    X_b = np.hstack((np.ones((X.shape[0], 1)), X))

    # Compute pseudo-inverse
    X_inverse = (np.linalg.inv(np.transpose(X_b).dot(X_b)).dot(
                 np.transpose(X_b)))

    # Compute w
    w = X_inverse.dot(y)

    return w


# Plot data
[docs]def plot(X, y, w):
    """Plot X data, the actual y output, and the prediction line.

    Parameters
    ----------
    X : numpy.ndarray
        Numpy array of data with 1 column.
    y : numpy.ndarray
        Numpy array of outputs. Dimensions are n * 1, where n is the number of
        rows in `X`.
    w : numpy.ndarray
        Numpy array with dimensions 2 * 1.

    """

    X_b = np.hstack((np.ones((X.shape[0], 1)), X))

    y_predict = X_b.dot(w)

    plt.clf()
    plt.plot(X[:, 0], y_predict, 'r-', X[:, 0], y, 'o')
    plt.pause(TIME_SLEEP)


[docs]def init_plot(figsize=(15, 8)):
    """Initializes the plot.

    Parameters
    ----------
    figsize : tuple, optional
        A tuple containing the width and height of the plot (the default is
        (15, 8)).

    """
    plt.ion()
    f = plt.figure(figsize=figsize)
    plt.show()


[docs]def evaluate_error(X, y, w):
    """Returns the mean squared error.

    X : numpy.ndarray
        Numpy array of data.
    y : numpy.ndarray
        Numpy array of outputs. Dimensions are n * 1, where n is the number of
        rows in `X`.
    w : numpy.ndarray
        Numpy array with dimensions (m + 1) * 1, where m is the number of
        columns in `X`.

    Returns
    -------
    float
        The mean squared error

    """

    X_b = np.hstack((np.ones((X.shape[0], 1)), X))
    y_predict = X_b.dot(w)
    dist = (y - y_predict) ** 2

    return float(np.sum(dist)) / X.shape[0]


[docs]def predict(X, w):
    """Returns the prediction for one data point.

    Parameters
    ----------
    X : numpy.ndarray
        Numpy array of data
    w : numpy.ndarray
        Numpy array with dimensions (m + 1) * 1, where m is the number of
        columns in `X`.

    Returns
    -------
    float
        The mean squared error

    """
    X_b = np.hstack((np.ones((X.shape[0], 1)), X))
    return X_b.dot(w)