Description of sampleMetropolis

0001 function [positionMatrix probabilityVector res] = sampleMetropolis(par, nSample)
0002 % sample from arbitrary distribution
0003 %
0004 % Syntax: [pointMatrix, probabilityVector] = sampleMetropolis(fHandle, ...
0005 %            nSample, initX, initCov)
0006 %
0007 % THIS IS NO USER FUNCTION
0008 
0009 % The elk-library: convex geometry applied to crystallization modeling.
0010 %   Copyright (C) 2013 Alexander Reinhold
0011 %
0012 % This program is free software: you can redistribute it and/or modify it
0013 %   under the terms of the GNU General Public License as published by the
0014 %   Free Software Foundation, either version 3 of the License, or (at your
0015 %   option) any later version.
0016 %
0017 % This program is distributed in the hope that it will be useful, but
0018 %   WITHOUT ANY WARRANTY; without even the implied warranty of
0019 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0020 %   General Public License for more details.
0021 %
0022 % You should have received a copy of the GNU General Public License along
0023 %   with this program.  If not, see <http://www.gnu.org/licenses/>.
0024 
0025 %% Algorithm Overview (Metropolis)
0026 %
0027 % Source: H. Haario, "An Adaptive Metropolis Algorithm"; Bernoulli, Vol. 7,
0028 %         2001.
0029 %
0030 % The Metropolis algorith can create samples proportional to any
0031 %   distribution:
0032 fHandle = par.metroHandle;
0033 %   without any a priory knowledge. The distribution density should be
0034 %   reasonably connected (e.g. two well seperated normal distributions
0035 %   might not be sampled properly). The algorithm uses a random walk or
0036 %   Markov chain to generate these sample points. Therefore, the user must
0037 %   provide a reasonable initial state for the Markov chain:
0038 metroX = par.metroInit;
0039 %   The corresponding distribution density sould be large for that point.
0040 %   Additionally, a guess for the covariance matrix of sampled points is
0041 %   required:
0042 metroCov = par.metroCov;
0043 %   which must reflect the range of the distribution but is not required to
0044 %   be accurate. We can direclty calculate the initial value of of the
0045 %   point in the Markov chain that is used to sample upcoming points:
0046 metroVal = fHandle(metroX);
0047 %
0048 %% Phase 1 (Metropolis)
0049 % In an initial step of the algorithm:
0050 metroState = 1;
0051 % the Markov chain is evaluated for a small amount of steps (10):
0052 nNonAdaptive = par.metroNonAdaptive;
0053 %   This is done to allow a reasonable estimate for the covariance matrix
0054 %   in the next step.
0055 %
0056 %% Phase 2 (Metropolis)
0057 % The inital covariance matrix is substituted by one calculated from the
0058 %   already sampled points and the adaption rules (see source) is applied.
0059 %   This phase may be hold for additional sample points:
0060 nNoHistory = par.metroNoHistory;
0061 %   which might be required when the initial guess of the covariance matrix
0062 %   is too bad or many points are sampled so that the history should avoid
0063 %   the slightly poorer adaptive phase of the algorithm.
0064 %
0065 %% Phase 3 (Metropolis)
0066 % The adaption rules still apply and a history of:
0067 nHistory = par.metroHistory;
0068 if nHistory < nNonAdaptive
0069     nHistory = nNonAdaptive;
0070 end
0071 %   sample points is taken from which an autocorrelation function is
0072 %   computed up to a lag of:
0073 nLag = par.metroLag;
0074 %   Based on this autocorrelation function, we estimate how many steps in
0075 %   the Markov chain are required so that we have an independent point.
0076 %   This time is assumed to be proportional to the time until which the
0077 %   last coordinate has taken a negative value. This value is stored as:
0078 %     nStepsLastZero
0079 %
0080 %% Phase 4 (Metropolis)
0081 % We now take samples, every:
0082 %     nSampleDelay = nStepsLastZero * nSampleDelayFraction
0083 %   step of the Markov chain where the fration (typically 2) is specified
0084 %   by the user:
0085 nSampleDelayFraction = par.metroSpacing;
0086 %   During this phase, all samples are taken and the acceptance rate is
0087 %   evaluated which is the ratio of accepted points vs. steps in the Markov
0088 %   chain.
0089 % This phase has also been parallelized, allowing to generate:
0090 nSampleBatch = par.metroSampleBatch;
0091 %   samples at once. Therefore, the X-value of the Markov chain is copied
0092 %   nSampleBatch times to yield nSampleBatch seperate Markov chains that
0093 %   run in parallel. The last step reduces the amount of chain to obtain
0094 %   exactly nSample points at the end.
0095 %
0096 %% Computing the probability density
0097 % to provide the probability distribution for the sampled points, we need
0098 %   to integrate the given (non-normalized) distribution function.
0099 %   Therefore, we can use the estimated mean and covariance that was used
0100 %   for the markov steps so that the integral is detemined by Monte Carlo
0101 %   integration with normally sampled points.
0102 relTol = par.metroRelTol;
0103 nIntegralBatch = par.metroIntegralBatch;
0104 % The user might, however specify the integral himself avoiding this
0105 % procedure:
0106 metroIntegral = par.metroIntegral;
0107 
0108 %% Init algorithm
0109 % for the following, we use a state indicating:
0110 %   1 - no adaption, no history, no sampling
0111 %   2 - with adaption, no history, no sampling
0112 %   3 - with adaption, with history, no sampling
0113 %   4 - no adaption, no history, sampling
0114 %   0 - algorithm finished
0115 
0116 nDim = length(par.metroInit);
0117 
0118 % initial variables
0119 nMetroIter = 0;
0120 nAdaptCounter = 0;
0121 nAccepted = 0;
0122 metroCholCov = chol(metroCov);
0123 historyMatrix = nan(nDim, nHistory);
0124 % historyCount = 0;
0125 sampleMatrix = nan(nDim, nSample);
0126 sampleValueVector = nan(1, nSample);
0127 
0128 % to be save any parameter was introduced properly, we delete it so it
0129 % cannot be used
0130 verbose = par.verbose;
0131 clear par
0132 
0133 %% Start algorithm
0134 cumulativeTime = zeros(1, 5);tic;
0135 nBatch = 1;
0136 if verbose,disp('Started: Metropolis algorithm.');end
0137 while metroState
0138     % increase iteration count
0139     nMetroIter = nMetroIter + nBatch;
0140 
0141     % sample new metro point (adapted lines from sampleNormal to avoid
0142     %   overhead)
0143     %! Dev: to avoid rare problems involving valid hC-vectors, a try catch
0144     %    is added.
0145     for i = 1:10
0146         try
0147             metroNewX = metroCholCov*randn(nDim, nBatch) + metroX;
0148             metroNewVal = fHandle(metroNewX);
0149             break
0150         catch
0151             % well, accept it
0152         end
0153     end
0154     
0155     % accept (or reject)
0156     acceptFilter = rand(1, nBatch) < ...
0157         min(ones(1, nBatch), metroNewVal./metroVal);
0158     nAccepted = nAccepted + sum(acceptFilter);
0159     metroX(:, acceptFilter) = metroNewX(:, acceptFilter);
0160     metroVal(acceptFilter) = metroNewVal(acceptFilter);
0161     
0162 %     if rand < min([1, metroNewVal / metroVal])
0163 %         nAccepted = nAccepted + 1;
0164 %         metroX = metroNewX;
0165 %         metroVal = metroNewVal;
0166 %     end
0167     
0168     % update history (also used to evaluate mean and cov after phase 2)
0169     if metroState <= 3
0170         historyMatrix(:, mod(nMetroIter-1, nHistory)+1) = metroX;
0171     end
0172     
0173     % adapt covariance matrix
0174     if metroState == 2 || metroState == 3
0175         nAdaptCounter = nAdaptCounter + 1;
0176         p1 = (nAdaptCounter - 1) / nAdaptCounter;
0177         p2 = 1/nAdaptCounter;
0178         % update mean
0179         metroPreMean = metroMean;
0180         metroMean = p1*metroMean + p2*metroX;
0181         % update cov
0182         metroCov = p1 * metroCov + covFactor*p2*(...
0183             nAdaptCounter * (metroPreMean*(metroPreMean')) - ...
0184             (nAdaptCounter + 1)*(metroMean*(metroMean')) + ...
0185             metroX*metroX' + covMargin);
0186         % update cholesky factorization for random sampling
0187         try
0188             metroCholCov = chol(metroCov);
0189         catch
0190             error('elk:distribution:numericalError', ['Adaptive part of ' ...
0191                 'Metropolis algorithm failed. Usually, your guess for the ' ...
0192                 'initial point or the initial covariance is bad. Change ' ...
0193                 'them, or if you are already close, try increasing the ' ...
0194                 'number of non-adaptive cycles.']);
0195         end
0196     end
0197     
0198     % add sample
0199     if metroState == 4 && iStepElapsedForSampling == nSampleDelay
0200         sampleMatrix(:, iSample + (1:nBatch)) = metroX;
0201         sampleValueVector(iSample + (1:nBatch)) = metroVal;
0202         iSample = iSample + nBatch;
0203         if verbose,disp(['Metropolis: sample ' num2str(iSample) ' generated.']);end
0204         % at the end, ensure we take exactly the requested number of
0205         % samples
0206         nBatch = min(nBatch, nSample - iSample);
0207         metroX = metroX(:, 1:nBatch);
0208         metroVal = metroVal(1:nBatch);
0209         iStepElapsedForSampling = 1;
0210     elseif metroState == 4
0211         iStepElapsedForSampling = iStepElapsedForSampling + 1;
0212     end
0213     
0214     % switch through metro states // events
0215     if metroState == 1 && nMetroIter == nNonAdaptive
0216         if verbose,disp(['Metropolis: state 1 finished after ' ...
0217                 num2str(nMetroIter) ' steps.']);end
0218         cumulativeTime(1) = toc;
0219         % initialize covariance matrix
0220         metroMean = mean(historyMatrix(:, 1:nMetroIter), 2);
0221         % the following factor is provided by the paper:
0222         covFactor = 2.4^2 / nDim;
0223         metroCov = covFactor * cov(historyMatrix(:, 1:nMetroIter)');
0224         
0225         % display result
0226         if verbose > 1
0227             disp('So far, the estimated mean is:')
0228             disp(metroMean')
0229             disp('.. the estimated covariance is:')
0230             disp(metroCov)
0231             disp('.. the determinant caclucates to:')
0232             disp(det(metroCov))
0233             disp('..the acceptance rate is:')
0234             disp(nAccepted/nMetroIter)
0235         end
0236         
0237         metroCholCov = chol(metroCov);
0238         % the following margin is also suggested by the paper:
0239         covMargin = 1e-6 * diag(diag(metroCov));
0240         % the covariance uses now
0241         nAdaptCounter = nMetroIter;
0242         
0243         % state change
0244         metroState = 2;
0245     end
0246     if metroState == 2 && nMetroIter == nNonAdaptive + nNoHistory
0247         if verbose,disp('Metropolis: state 2 finished');end
0248         cumulativeTime(2) = toc;
0249         % state change
0250         nMetroIter = 0;
0251         metroState = 3;
0252     end
0253     if metroState == 3 && nMetroIter == nHistory
0254         if verbose,disp('Metropolis: state 3 finished');end
0255         cumulativeTime(3) = toc;
0256         % evaluate history
0257         metroAcMatrix = computeAcMatrix(historyMatrix, nLag);
0258         % the autocorrelation must have reached negative values for every
0259         %   coordinate, we will need the step in which this condition
0260         %   becomes true:
0261         lastZeroVector = nan(1, nDim);
0262         for iDim = 1:nDim
0263             thisLagTime = find(metroAcMatrix(iDim, :) < 0, 1, 'first');
0264             if isempty(thisLagTime)
0265                 lastZeroVector(iDim) = nLag;           
0266             else
0267                 lastZeroVector(iDim) = thisLagTime;
0268             end
0269         end
0270         
0271         % check if everything is sane
0272         if mean(lastZeroVector) < 0.9*nLag
0273             % set parameters for next step
0274             nStepsLastZero = mean(lastZeroVector);
0275             nSampleDelay = round(nStepsLastZero * nSampleDelayFraction);
0276             iSample = 0;
0277         else
0278             if verbose > 1
0279             disp('For the following error, the estimated mean is:')
0280             disp(metroMean')
0281             disp('And the estimated covariance is:')
0282             disp(metroCov)
0283             end
0284             error('elk:distribution:adaptiveMetropolis', ...
0285                 ['The adaptive Metropolis algorithm did not converge ' ...
0286                  'properly. Look, if metroInit and metroCov are ' ...
0287                  'appropriately chosen.']);
0288         end
0289         
0290         % change to batch mode
0291         nBatch = min(nSampleBatch, nSample);
0292         metroX = metroX * ones(1, nBatch);
0293         metroVal = metroVal * ones(1, nBatch);
0294         
0295         % state change
0296         nMetroIter = 0;
0297         nAccepted = 0;
0298         iStepElapsedForSampling = 0;
0299         metroState = 4;
0300         if verbose,disp(['Metropolis: generating samples every ' ...
0301                 num2str(nSampleDelay) ' step.']);end
0302         
0303     end
0304     if metroState == 4 && iSample >= nSample
0305         if verbose,disp('Metropolis: state 4 finished');end
0306         cumulativeTime(4) = toc;
0307         metroState = 0;
0308     end
0309 
0310 end
0311 % position matrix is straight forward
0312 positionMatrix = sampleMatrix;
0313 
0314 % for the probability values, we require the normalizing factor which makes
0315 %   the input density distribution to a probability distribution.
0316 if ~isempty(metroIntegral)
0317     y = metroIntegral;
0318     yErr = 0;
0319 else
0320     [y yErr] = integrateNormal(fHandle, metroMean, metroCov/covFactor, ...
0321         relTol, nIntegralBatch, verbose);
0322 end
0323 probabilityVector = sampleValueVector / y;
0324 cumulativeTime(5) = toc;
0325 
0326 %% assign some additional output
0327 res.acMatrix = metroAcMatrix;
0328 res.acceptanceRate = nAccepted / nMetroIter;
0329 res.rejectionRate = 1 - res.acceptanceRate;
0330 res.sampleDelay = nSampleDelay;
0331 res.integralValue = y;
0332 res.integralRelErr = yErr;
0333 res.mean = metroMean;
0334 res.cov = metroCov;
0335 res.cumulativeTime = cumulativeTime;
0336 
0337 end
0338 
0339 function acMatrix = computeAcMatrix(dataMatrix, nLag)
0340     acMatrix = dataMatrix(:, 1:nLag);
0341     meanVector = mean(dataMatrix, 2);
0342 %     meanVector = dataMatrix(:, 1);
0343 %     figure(101);cla;hold on
0344     
0345     for iDim = 1:size(dataMatrix, 1)
0346         thisDataVector = dataMatrix(iDim, :) - meanVector(iDim);
0347         
0348         for iK = 1:nLag
0349             acMatrix(iDim, iK) = thisDataVector(1:(end-(iK-1))) * ...
0350                 thisDataVector(iK:end)';
0351         end
0352         acMatrix(iDim, :) = acMatrix(iDim, :) / acMatrix(iDim, 1);
0353         
0354         %! debug code
0355 %         plot(acMatrix(iDim, :))
0356     end
0357 %     size(dataMatrix)
0358     
0359     %! Debug:
0360 %     figure(102),cla,plot(dataMatrix(1,:), dataMatrix(2,:))
0361     %/
0362 end
0363 
0364 function [y yErr] = integrateNormal(fHandle, parMean, parCov, relTol, nBatch, verbose)
0365 
0366 intStatus = 1;
0367 nNew = nBatch;
0368 valueVector = [];
0369 probVector = [];
0370 
0371 if verbose,disp(['Metropolis: phase 5 started.']);end
0372 relTol = relTol
0373 while intStatus
0374     % sample new points
0375     disD = obtainDistributionDiscrete('normal', 'nSample', nNew, ...
0376         'mean', parMean, 'cov', parCov);
0377     
0378     % extract probability
0379     probVector = [probVector disD.probabilityVector];
0380     
0381     % add value for samples
0382     valueVector = [valueVector fHandle(disD.positionMatrix)];    
0383     
0384     % integral
0385     nSample = length(valueVector);
0386     y = 1/nSample * sum(valueVector ./ probVector);
0387     
0388     % error
0389     yErr = sqrt(1/nSample/(nSample-1) * sum((...
0390                 valueVector ./ probVector - y).^2));
0391     yErr = yErr / y;
0392     
0393     % decide if finished and estimate how many samples should be sampled in
0394     % the next step
0395     if yErr < relTol
0396         intStatus = 0;
0397     else
0398         % Example: if we have taken 1000 samples and obtain an error of
0399         %   0.04 but require an error of 0.01, we need 4 times more
0400         %   accuracy. This means we need 4^2=16 times the number of
0401         %   samples (15 times more). To ensure, we obtain the required
0402         %   error we demand 0.9 times the original relative tolerance
0403         nNew = round(nSample * (yErr/(0.9*relTol))^2 - nSample);
0404         nNew = min(nNew, nBatch);
0405         if verbose,disp(['Metropolis: integration error now at ' ...
0406                 num2str(yErr) ', using ' num2str(nNew) ...
0407                 ' new sample points']);
0408         end
0409     end
0410 end
0411 
0412 
0413 end
sampleMetropolis

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE