Академический Документы
Профессиональный Документы
Культура Документы
p=223
view source
print?
1 %This file contains a walkthrough for explaining Gaussian Processes to
2 %any scientist, including statisticians and non statisticians.
3 %There is a .pdf file that completes this tutorial.
4
5 %% Demo #1 Univariate Normal %%
6 % in order to generate a univariate random number, use any real numbers \mu
7 % and \sigma to define the distribution.
8 mu = 6.2; %the mean
9 sigma_squared = 2; %sigma^2 is the variance
10
11 %randn returns a random number from the standard normal distribution, which
12 %is a univariate normal with \mu = 0, \sigma = 1
13 standard_normal_random_number = randn;
14
15 %this is converted to an arbitrary univariate gaussian normal by:
16 normal_random_number = sqrt(sigma_squared) * standard_normal_random_number + mu;
17
18 %now, we can plot what's going on:
19 for i=1:100000
20 standard_normal_random_number(i) = randn;
normal_random_number(i) = sqrt(sigma_squared) * standard_normal_random_number(i) +
21
mu;
22 end
23
24 hist([standard_normal_random_number' normal_random_number'],50)
25 legend('standard', 'custom')
26
27 %% Demo #2 Multivariate Normal %%
28 %The same trick works for the multivariate normal. \Mu has to be a vector,
29 %and \Sigma a symetric semidefinite matrix
30 Mu = [3 ;0]; %the mean
31
32 %a matrix is symetric iff M(a,b) = M(b,a), a positive semidefinite iff
33 % x*M*x'>=0 for any vector x. These properties are satisfied by all matrices
34 % which are taken from the matlab function gallery('randcorr',n)
35 Sigma = [ 1.0000 -0.9195; -0.9195 1.0000];
36
37 % to find A such that A'*A = Sigma, we find the eigendecompositon of Sigma:
38 % V'*D*V = Sigma
39 [V,D]=eig(Sigma);
40 A=V*(D.^(1/2));
41
42 %The standard multivariate normal is just a series of independently drawn
43 %univariate normal random numbers
44 standard_random_vector = randn(2,1);
45
1 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
2 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
95 %We can find the mean and the variance of the GP at each point
96 prediction_x=-2:0.01:1;
97 for i=1:length(prediction_x)
98 mean(i) = 0;
99 variance(i) = k(prediction_x(i),prediction_x(i));
100 end
plot_variance = @(x,lower,upper,color) set(fill([x,x(end:-1:1)],
101
[upper,fliplr(lower)],color),'EdgeColor',color);
102 plot_variance(prediction_x,mean-1.96*variance,mean+1.96*variance,[0.9 0.9 0.9])
103 hold on
104 set(plot(prediction_x,mean,'k'),'LineWidth',2)
105
106 %% Demo #4 Gaussian Process Sampling %%
107 % now, we would like to sample from a Gaussian Process defined by this
108 % kernel. First, for the subset of points we are interested to plot, we
109 % construct the kernel matrix (using our kernel function)
110 K=zeros(length(prediction_x),length(prediction_x));
111 for i=1:length(prediction_x)
112 for j=i:length(prediction_x)%We only calculate the top half of the matrix. This is
an unnecessary speedup trick
113 K(i,j)=k(prediction_x(i),prediction_x(j));
114 end
115 end
116 K=K+triu(K,1)'; % We can use the upper half of the matrix and copy it to the
117 %bottom, because it is symetrical
118
119 [V,D]=eig(K);
120 A=V*(D.^(1/2));
121
122 %Then, we use the kernel as the covariance matrix of a multivariate normal
123 clear gaussian_process_sample;
124 for i=1:7
125 standard_random_vector = randn(length(prediction_x),1);
126 gaussian_process_sample(:,i) = A * standard_random_vector;
127 end
128
129 plot(prediction_x,real(gaussian_process_sample))
130
131 %% Demo #5 Gaussian Process Regression %%
132 %initialize observations
133 X_o = [-1.5 -1 -0.75 -0.4 -0.3 0]';
134 Y_o = [-1.6 -1.3 -0.5 0 0.3 0.6]';
135
136 K = zeros(length(X_o));
137 for i=1:length(X_o)
3 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
142
%note that we use kernel_function, not kernel_function+error_function, when
143
incorporating points other than noisy measurements
144 K_ss=zeros(length(prediction_x),length(prediction_x));
145 for i=1:length(prediction_x)
146 for j=i:length(prediction_x)%We only calculate the top half of the matrix. This an
unnecessary speedup trick
147 K_ss(i,j)=kernel_function(prediction_x(i),prediction_x(j));
148 end
149 end
150 K_ss=K_ss+triu(K_ss,1)'; % We can use the upper half of the matrix and copy it to the
151
152 K_s=zeros(length(prediction_x),length(X_o));
153 for i=1:length(prediction_x)
173
174 legend('confidence bounds','mean','data points','location','SouthEast')
175
176 %% Demo #5.2 Sample from the Gaussian Process posterior
177 clearvars -except k prediction_x K X_o Y_o
178
179 %We can also sample from this posterior, the same way as we sampled before:
180 K_ss=zeros(length(prediction_x),length(prediction_x));
181 for i=1:length(prediction_x)
182 for j=i:length(prediction_x)%We only calculate the top half of the matrix. This an
unnecessary speedup trick
183 K_ss(i,j)=k(prediction_x(i),prediction_x(j));
184 end
185 end
186 K_ss=K_ss+triu(K_ss,1)'; % We can use the upper half of the matrix and copy it to the
4 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
187
188 K_s=zeros(length(prediction_x),length(X_o));
189 for i=1:length(prediction_x)
5 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
6 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
282 clear
283 close all
284 sigma_f=0.5;
285 l=1;
286 kernel_function = @(x,x2) sigma_f^2*exp(dot(x-x2,x-x2)/(-2*l^2));
287
288 grid_size=20;
289 animation_length=100;
290 tic
291 [x1 x2]=meshgrid(1:grid_size,1:grid_size+animation_length);
292 toc
293 % prediction_x is a matrix where each collumn is a test location in x
294 prediction_x=[x1(:)';x2(:)']'./5;
295 % for i=1:length(prediction_x)
296 % for j=1:length(prediction_x)
297 % K(i,j)=kernel_function(prediction_x(:,i),prediction_x(:,j));
298 % end
299 % end
300 %new version
301 tic
302 n=size(prediction_x,1);
303 K=prediction_x*prediction_x'/sigma_f^2;
304 d=diag(K);
305 K=K-ones(n,1)*d'/2;
306 K=K-d*ones(1,n)/2;
307 K=exp(K);
308 toc
309
310 [V,D]=eig(K);
311 A=V*(D.^(1/2));
312
313 standard_random_vector = randn(length(prediction_x),1);
314 gaussian_process_sample = real(A * standard_random_vector)./3;
315
316 sample_rect=reshape(real(gaussian_process_sample),grid_size+animation_length,grid_size);
317 i=1;
318 surf(x1(i:i+grid_size-1,:),x2(i:i+grid_size-1,:),sample_rect(i:i+grid_size-1,:));
319
7 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
332 clear
333 close all
334 sigma_f=7;
335
336 grid_size=50;
337 [x1 x2]=meshgrid(1:grid_size);
338 x1_place=grid_size;
339 % prediction_x is a matrix where each collumn is a test location in x
340 prediction_x=[x1(:)';x2(:)']';
341 % for i=1:length(prediction_x)
342 % for j=1:length(prediction_x)
343 % K(i,j)=kernel_function(prediction_x(:,i),prediction_x(:,j));
344 % end
345 % end
346 %new version
347 K=rbf(prediction_x,sigma_f);
348 [V,D]=eig(K);
349 A=V*(D.^(1/2));
350
351 standard_random_vector = randn(length(prediction_x),1);
352 gaussian_process_sample = real(A * standard_random_vector);
353
354 sample_rect=reshape(real(gaussian_process_sample),grid_size,grid_size);
355 surf(x1,x2,sample_rect);
356 i=1;
357 axis([1 grid_size i i+grid_size-1 -2 2])
358 xlabel('x_1')
359 ylabel('x_2')
360 zlabel('y')
361
8 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
382
383
384 standard_random_vector = randn(length(K_ss),1);
385 %gaussian_process_sample
386 %real(gaussian_process_sample(grid_size+1:end))
new_sample = A * standard_random_vector+K_s/K*real(gaussian_process_sample(end-
387
length(previous_x)+1:end));
gaussian_process_sample =
388
[gaussian_process_sample(length(new_sample)+1:end);new_sample];
389 toc
390 sample_rect=reshape(real(gaussian_process_sample),grid_size,grid_size);
391
392 surf(x1,x2,sample_rect);
393 axis([i i+grid_size-1 1 grid_size -2 2])
394 xlabel('x_1')
395 ylabel('x_2')
396 zlabel('y')
397
398 pause(0.05)
399 end
400
401 %% Demo 10 Sampling the Gaussian Process prior in 3D
402 %the previously used kernel_function and error_function can be extended to
403 %support vector input. This is an natural case of the squared exponential
404 %kernel, with the same parameters and properties. (note that this is the
405 %rotationally invariant version)
406 kernel_function_m = @(x,x2) sigma_f^2*exp((x-x2)'*(x-x2)/(-2*l^2));
407 %here, the error function needs to test whether two vectors are exactly the
408 %same. This is done by counting the number of matches between x and x2
409 error_function_m = @(x,x2) sigma_n^2*(sum(x==x2)==length(x));
410 %k_m is used instead of k
411 k_m = @(x,x2) kernel_function_m(x,x2)+error_function_m(x,x2);
412
413 %the resolution option here allows to change the granularity of the sampled
414 %Gaussian Process. Since resolution^2 samples need to be taken, this cannot
415 %be too high. In even higher dimensions, taking samples from a Gaussian
416 %Process at every point of a grid becomes prohibitive, so they cannot be
417 %visualised by their samples.
418 resolution=5;
419 %generate the grid where to take samples, and save it the same way as
420 %prediciton_x in previous Demos.
421 [a b]=meshgrid(linspace(0,1,resolution));
422 %Before, prediciton_x had n*1 dimensions, now it will have n*2
423 prediction_x=[a(:) b(:)]';
424
425 %This is done exactly as in Demo 4
426 K=zeros(size(prediction_x,2),size(prediction_x,2));
427 for i=1:size(prediction_x,2)
428 for j=i:size(prediction_x,2)%We only calculate the top half of the matrix.
429 %(This is an unnecessary speedup trick)
9 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
430
431 %here, we use the vectors prediction_x(:,i) for each point
432 K(i,j)=k_m(prediction_x(:,i),prediction_x(:,j));
433 end
434 end
435 K=K+triu(K,1)'; % We can use the upper half of the matrix and copy it to the
436 %bottom, because it is symetrical
437
438 [V,D]=eig(K);
439 A=V*(D.^(1/2));
440
441 %take a single sample, and plot the surface. Note that this becomes
442 %impossible in higher dimensions.
443 standard_random_vector = randn(size(prediction_x,2),1);
444 sample=A * standard_random_vector;
445 surf(reshape(sample,resolution,resolution))
446
447 %an important note: a Gaussian Process with 1D input is a continuous line, but for a
448 %2D input, it is a continuous surface. That's because the Gaussian Process defines a
449 %value for every possible point everywhere in the x-y space.
450
451 %% Demo 11 Sampling the Gaussian Process posterior in many dimensions
452 dimensions=4;
453 %this initialises the random number generator to create the same numbers
454 %each time it is executed, for repeatability.
455 rng('default');
456 %generate 7 random observations in 4 dimensions
457 X_o = rand(dimensions,7);
458 Y_o = [-2 -1.6 -1.3 -0.5 0 0.3 0.6]';
459
460 K = zeros(size(X_o,2));
461 for i=1:size(X_o,2)
471 for j=i:size(prediction_x,2)%We only calculate the top half of the matrix. This an
unnecessary speedup trick
472 K_ss(i,j)=k_m(prediction_x(:,i),prediction_x(:,j));
473 end
474 end
475 K_ss=K_ss+triu(K_ss,1)'; % We can use the upper half of the matrix and copy it to the
476
477 K_s=zeros(size(prediction_x,2),size(X_o,2));
10 of 11 22/12/2015 23:45
http://mrmartin.net/?p=223
479 for j=1:size(X_o,2)%We only calculate the top half of the matrix. This an
unnecessary speedup trick
480 K_s(i,j)=k_m(prediction_x(:,i),X_o(:,j));
481 end
482 end
483
484 %calculate Mu and Sigma according to the equation on page 13
485 Mu = (K_s/K)*Y_o;
486 Sigma = K_ss-K_s/K*K_s';
11 of 11 22/12/2015 23:45