Implementation of Gaussian Process Regression in Python y(n_samples, n_targets)

Question

I am working on some price data with x = day1, day2, day3,...etc. on day1, I have let's say 15 price points(y), day2, I have 30 price points(y2), and so on.

When I read the documentation of Gaussian Process Regression: http://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcess.html#sklearn.gaussian_process.GaussianProcess.fit

y is shape (n_samples, n_targets) with the observations of the output to be predicted.

I assume n_targets refers all the price points I observed on each day. However, the number of price points on each day are not the same. I wonder how to deal with a case like this?

Many thanks!

Muatik · Accepted Answer · 2016-12-23 14:45:42Z

I have made an implementation of gaussian process for regression in python using only numpy. My aim was to understand it by implementing it. It may be helpful for you.

https://github.com/muatik/machine-learning-examples/blob/master/gaussianprocess2.ipynb

import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(color_codes=True)

%matplotlib inline


class GP(object):

    @classmethod
    def kernel_bell_shape(cls, x, y, delta=1.0):
        return np.exp(-1/2.0 * np.power(x - y, 2) / delta)

    @classmethod
    def kernel_laplacian(cls, x, y, delta=1):
        return np.exp(-1/2.0 * np.abs(x - y) / delta)

    @classmethod
    def generate_kernel(cls, kernel, delta=1):
        def wrapper(*args, **kwargs):
            kwargs.update({"delta": delta})
            return kernel(*args, **kwargs)
        return wrapper

    def __init__(self, x, y, cov_f=None, R=0):
        super().__init__()
        self.x = x
        self.y = y
        self.N = len(self.x)
        self.R = R

        self.sigma = []
        self.mean = []
        self.cov_f = cov_f if cov_f else self.kernel_bell_shape
        self.setup_sigma()

    @classmethod
    def calculate_sigma(cls, x, cov_f, R=0):
        N = len(x)
        sigma = np.ones((N, N))
        for i in range(N):
            for j in range(i+1, N):
                cov = cov_f(x[i], x[j])
                sigma[i][j] = cov
                sigma[j][i] = cov

        sigma = sigma + R * np.eye(N)
        return sigma

    def setup_sigma(self):
        self.sigma = self.calculate_sigma(self.x, self.cov_f, self.R)

    def predict(self, x):
        cov = 1 + self.R * self.cov_f(x, x)
        sigma_1_2 = np.zeros((self.N, 1))
        for i in range(self.N):
            sigma_1_2[i] = self.cov_f(self.x[i], x)

        # SIGMA_1_2 * SIGMA_1_1.I * (Y.T -M)
        # M IS ZERO
        m_expt = (sigma_1_2.T * np.mat(self.sigma).I) * np.mat(self.y).T
        # sigma_expt = cov - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2
        sigma_expt = cov + self.R - (sigma_1_2.T * np.mat(self.sigma).I) * sigma_1_2
        return m_expt, sigma_expt

    @staticmethod
    def get_probability(sigma, y, R):
        multiplier = np.power(np.linalg.det(2 * np.pi * sigma), -0.5)
        return multiplier * np.exp(
            (-0.5) * (np.mat(y) * np.dot(np.mat(sigma).I, y).T))

    def optimize(self, R_list, B_list):

        def cov_f_proxy(delta, f):
            def wrapper(*args, **kwargs):
                kwargs.update({"delta": delta})
                return f(*args, **kwargs)
            return wrapper

        best = (0, 0, 0)
        history = []
        for r in R_list:
            best_beta = (0, 0)
            for b in B_list:
                sigma = gaus.calculate_sigma(self.x, cov_f_proxy(b, self.cov_f), r)
                marginal = b* float(self.get_probability(sigma, self.y, r))
                if marginal > best_beta[0]:
                    best_beta = (marginal, b)
            history.append((best_beta[0], r, best_beta[1]))
        return sorted(history)[-1], np.mat(history)

Now you can try it as follows:

# setting up a GP
x = np.array([-2, -1, 0, 3.5, 4]);
y = np.array([4.1, 0.9, 2, 12.3, 15.8])
gaus = GP(x, y)

x_guess = np.linspace(-5, 16, 400)
y_pred = np.vectorize(gaus.predict)(x_guess)

plt.scatter(x, y, c="black")
plt.plot(x_guess, y_pred[0], c="b")
plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")
plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")

The effects of regularization parameter

def create_case(kernel, R=0):
    x = np.array([-2, -1, 0, 3.5, 4]);
    y = np.array([4.1, 0.9, 2, 12.3, 15.8])
    gaus = GP(x, y, kernel, R=R)

    x_guess = np.linspace(-4, 6, 400)
    y_pred = np.vectorize(gaus.predict)(x_guess)

    plt.scatter(x, y, c="black")
    plt.plot(x_guess, y_pred[0], c="b")
    plt.plot(x_guess, y_pred[0] - np.sqrt(y_pred[1]) * 3, "r:")
    plt.plot(x_guess, y_pred[0] + np.sqrt(y_pred[1]) * 3, "r:")

plt.figure(figsize=(16, 16))
for i, r in enumerate([0.0001, 0.03, 0.09, 0.8, 1.5, 5.0]):
    plt.subplot("32{}".format(i+1))
    plt.title("kernel={}, delta={}, beta={}".format("bell shape", 1, r))
    create_case(
        GP.generate_kernel(GP.kernel_bell_shape, delta=1), R=r)

plt.figure(figsize=(16, 16))
for i, d in enumerate([0.05, 0.5, 1, 3.2, 5.0, 7.0]):
    plt.subplot("32{}".format(i+1))
    plt.title("kernel={}, delta={}, beta={}".format("kernel_laplacian", d, 1))
    create_case(
        GP.generate_kernel(GP.kernel_bell_shape, delta=d), R=0)

hello, i have tried multiple implementations for gaussian process(regression or classification) but with 100,000 datapoints it always errors out with memory error. The maximum the algo is able to take is 10,000 points. Is this the behavior?
hello. the example above was not intended to work with a large dataset or to use in any production. it was just a way to understand the algorithm. After understanding the also, if you want to use it, you should find a proper framework/library etc.

Collectives™ on Stack Overflow

Implementation of Gaussian Process Regression in Python y(n_samples, n_targets)

1 Answer 1

The effects of regularization parameter

2 Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

The effects of regularization parameter

2 Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related