iData: added PCA method

McStasMcXtrace · Sep 26, 2013 · 16c56f4 · 16c56f4
1 parent 8e7529c
commit 16c56f4
Show file tree

Hide file tree

Showing 8 changed files with 391 additions and 204 deletions.
diff --git a/Docs/Math.html b/Docs/Math.html
@@ -486,6 +486,18 @@ <h3><a class="mozTocH3" name="mozTocId566959"></a>Unary operators</h3>
   <td style="vertical-align: top;"><a href="http://en.wikipedia.org/wiki/Image_segmentation">Data segmentation</a> (<a href="http://en.wikipedia.org/wiki/K-means_clustering">k-means</a>)<br>
   </td>
 </tr><tr>
+  <td style="vertical-align: top;"><a href="#mozTocId77311"><span style="font-weight: bold;">pca</span></a><br>
+  </td>
+  <td style="vertical-align: top;">Principal component coordinates<br>
+  </td>
+  <td style="vertical-align: top;">0<br>
+  </td>
+  <td style="vertical-align: top;"><br>
+  </td>
+  <td style="vertical-align: top;"><a href="http://en.wikipedia.org/wiki/Principal_component_analysis">Principal component analysis</a><br>
+  </td>
+</tr>
+<tr>
   <td style="vertical-align: top;"><a href="#mozTocId447291"><span style="font-weight: bold;">resize</span></a><br>
   </td>
   <td style="vertical-align: top;">Resized signal<br>
@@ -930,7 +942,10 @@ <h4><a class="mozTocH4" name="mozTocId121035"></a>Convolution/correlation</h4>
 computes the <a href="http://en.wikipedia.org/wiki/Cross-correlation">cross correlation</a> of two signals. The auto-correlation is simply
 <span style="font-style: italic;">xcorr(a)</span>.<br>
 <br>
-Last, the <span style="font-weight: bold;">corrcoef</span> method returns the <a href="http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient">Pearson product-moment correlation coefficient</a>, which measures the similarity between two data sets. It returns a measure of the similarity between data sets (1=equal, 0=non correlated, -1=anti-correlated).<br>
+Last, the <span style="font-weight: bold;">corrcoef</span> method returns the <a href="http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient">Pearson product-moment correlation coefficient</a>,
+ which measures the similarity between two data sets. It returns a 
+measure of the similarity between data sets (1=equal, 0=non correlated, 
+-1=anti-correlated).<br>
 <br><span style="color: rgb(255, 0, 0);">
 Warning: </span>The accuracy of the <span style="font-style: italic;">conv</span> and <span style="font-style: italic;">xcorr</span>
 operators depends on the axis sampling. A coarse axis sampling (that is
@@ -1053,6 +1068,11 @@ <h4><a class="mozTocH4" name="mozTocId77311"></a>Data segmentation (partitioning
 
 
 The wavelet transform <span style="font-weight: bold;">cwt</span> may also be used to perform a peak analysis, separating sharp features from broad ones (see <a href="#mozTocId663052">above</a>).<br>
+<br>
+The <a href="http://en.wikipedia.org/wiki/Principal_component_analysis">principal component analysis</a>
+ (PCA) methodology consists in finding similarities between data sets. 
+Groups of 'close' data sets can then be defined. The corresponding iData
+ method is <span style="font-weight: bold;">pca</span> .<br>
 <h3><a class="mozTocH3" name="mozTocId439767"></a><span style="font-weight: bold;">Projection, integration and sum</span></h3>
 <a href="images/iData_sum_camproj.png"><img alt="iData: sum and projection" title="iData: sum and projection" src="images/iData_sum_camproj.png" style="border: 0px solid ; width: 200px; height: 408px;" align="right"></a>There
 are

diff --git a/Docs/Methods.html b/Docs/Methods.html
diff --git a/Objects/@iData/cwt.m b/Objects/@iData/cwt.m
@@ -31,6 +31,7 @@
 %            cross wavelet transform and wavelet coherence to geophysical time 
 %            series, Nonlin. Processes Geophys., 11, 561–566, doi:10.5194/npg-11-561-2004
 % http://noc.ac.uk/using-science/crosswavelet-wavelet-coherence
+% http://en.wikipedia.org/wiki/Continuous_wavelet_transform
 %
 % Version: $Revision: 1035 $
 % See also iData, iData/fft, iData/xcorr, iData/conv

diff --git a/Objects/@iData/kmeans.m b/Objects/@iData/kmeans.m
@@ -4,16 +4,20 @@
 %   @iData/kmeans function to partition the object X into k classes.
 %
 %   b = kmeans(a,k) partitions the points in the iData object X into k clusters.
-%   The resulting object Signal contains numbers from 1 to 'k' which are indices
-%   of segments/partitions.
-%   When no cluster can be found, the result is empty.
+%     The resulting object Signal contains numbers from 1 to 'k' which are indices
+%     of segments/partitions.
+%     When no cluster can be found, the result is empty.
+%   b = kmeans(a) assumes k=2 partitions
+%   [b,c] = kmeans(a,k) also returns the centroid of the clusters/partitions/segments.
 %
 % input:  X: object or array (iData)
 %         k: number of partitions wanted (integer, default is 2)
 % output: b: object or array with partition indices (iData)
 %         c: centroid locations of clusters
 % ex:     b=kmeans(a);
 %
+% See: http://en.wikipedia.org/wiki/K-means_clustering
+%
 % Version: $Revision: 1035 $
 % See also iData, iData/uminus, iData/abs, iData/real, iData/imag, iData/uplus
 

diff --git a/Objects/@iData/pca.m b/Objects/@iData/pca.m
@@ -0,0 +1,128 @@
+function b = pca(a, varargin)
+% [b] = pca(X, k) : Principal component analysis of iData object(s)
+%
+%   @iData/pca function to perform a Principal component analysis of object(s)
+%
+%   b = pca(a,k) computes the principal component analysis of an object in a k-D
+%     space representation. This corresponds to a classification of the objects
+%     rows, searching for similarities/correlations. The resulting principal 
+%     component coefficients object contains the same number of rows as 'a', and
+%     k columns for coordinates.
+%     Rows (rank 1) of X correspond to observations and further ranks correspond 
+%     to variables.
+%   b = pca(a) assumes k=2 (2D space classifier).
+%   b = pca([a1 a2 ...], k) performs the PCA along all objects specified in the 
+%     array, and return the principal component coefficients. The resulting object
+%     contains as many rows as the number of objects specified, and k columns.
+%   b = pca(a, key, value, ...)
+%     specifies the PCA configuration as key=value pairs:
+%       'NumComponents',k
+%         Number of components requested, specified as the comma-separated pair
+%         consisting of 'NumComponents' and a scalar integer k satisfying 
+%         0 < k <= p, where p is the number of original variables in X. When 
+%         specified, pca returns the first k PCA coefficients.
+%       'Centered', True(default) or False
+%         Center the variables by subtracting the mean values
+%       'VariableWeights', false or 'variance'
+%         'variance' to normalize variables to their variance (default)
+%
+% input:  X: object or array (iData)
+%         k: number of components wanted (integer, default is 2)
+% output: b: principal component object (iData)
+% ex:     b=pca(a); plot(b); text(b{1},b{2}, get(a,'Title'));
+%
+% See: http://en.wikipedia.org/wiki/Principal_component_analysis
+%
+% Version: $Revision: 1035 $
+% See also iData, iData/kmeans, iData/cwt, iData/corrcoef
+
+% handle input iData arrays
+if numel(a) > 1
+  % first align all objects (union)
+  a = union(a);
+  labls = get(a, 'Title');
+  % then add all array signals as 1D vectors, one per row
+  for index=1:numel(a)
+    s = subsref(a(index),struct('type','.','subs','Signal'));
+    a(index) = set(a(index), 'Signal', s(:)');
+  end
+  s = [];
+  a = cat(2, a);
+
+  % then call pca
+  b = pca(a, varargin{:});
+  return
+end
+
+% default config
+k         = 2;
+algorithm = 'svd';
+centered  = 1;
+scale     = 'variance';
+
+% parse input key/values
+for index=1:numel(varargin)
+  key = varargin{index};
+  if ischar(key) && index < numel(varargin)
+    value = varargin{index+1};
+    index = index + 1;
+  else
+    value = [];
+  end
+  if index==1 && isscalar(key)
+    k = key;
+  else
+    switch lower(key)
+    case 'numcomponents'
+      k = value;
+    case 'centered'
+      centered  = value;
+    case 'variableweights'
+      scale     = value;
+    end
+  end
+end
+
+% get data set signal
+S = subsref(a,struct('type','.','subs','Signal'));
+
+[n m] = size(S);
+
+if centered, 
+  S=S - repmat(mean(S),[n 1]);
+end
+if strcmp(scale, 'variance')
+  S=S./ repmat(std(S),[n 1]);
+end
+
+% compute PCA
+[V D] = eig(cov(S)); 
+D = diag(D);
+[D, index] = sort(D,'descend');
+% permute coefficients from highest to lowest weight
+V = V(:,index);
+% restrict to dimensionality 'k'
+if k > size(V,2), k=size(V,2); end
+if k > 0
+  V = V(:, 1:k);
+end
+coeff  = V;
+latent = D;
+score  = S*coeff;
+
+% assemble output object
+b = copyobj(a);
+b.Data.Observations = 1:size(V,1);
+b = setalias(b, 'Observations', 'Data.Observations');
+b = setalias(b, 'Coefficients', coeff, 'Principal component coefficients');
+b = setalias(b, 'Scores', score, 'Principal component scores');
+b = setalias(b, 'Variances', latent, 'Principal component variances');
+b = set(b, 'Signal', 'Observations', 'Error', 0);
+for index=1:k
+  b.Data.([ 'PCA' num2str(index)]) = V(:,index);
+  b = setalias(b, [ 'PCA' num2str(index)], ['Data.PCA' num2str(index)] );
+  b = setaxis( b, index, [ 'PCA' num2str(index)]);
+end
+
+b = iData_private_history(b, mfilename, a, varargin{:});
+
diff --git a/Objects/@iData/smooth.m b/Objects/@iData/smooth.m
@@ -1,4 +1,4 @@
-function a = smooth(a, varargin)
+function b = smooth(a, varargin)
 % s = smooth(a) : smooth iData objects
 %
 %   @iData/smooth function to smooth iData objects
@@ -58,11 +58,9 @@
 
 % handle input iData arrays
 if numel(a) > 1
+  b = zeros(iData, size(a));
   for index=1:numel(a)
-    a(index) = feval(mfilename, a(index), varargin{:});
-  end
-  if nargout == 0 & length(inputname(1))
-    assignin('caller',inputname(1),a);
+    b(index) = feval(mfilename, a(index), varargin{:});
   end
   return
 end
@@ -83,6 +81,7 @@
   end
 end
 
+b = copyobj(a);
 if strcmp(method, 'sgolay')
   % use Savitzky-Golay
 
@@ -98,18 +97,14 @@
 
     if index > 1, s  = permute(s, [ index 1 ]); end
   end
-  a = set(a, 'Signal', s);
+  b = set(b, 'Signal', s);
 else
   % use discrete cosine transform filter
-  a = set(a, 'Signal', smoothn(subsref(a,struct('type','.','subs','Signal'), varargin{:})));
+  b = set(b, 'Signal', smoothn(subsref(a,struct('type','.','subs','Signal'), varargin{:})));
 end
 
 
-a = iData_private_history(a, mfilename, a, varargin{:});
-
-if nargout == 0 & length(inputname(1))
-  assignin('caller',inputname(1),a);
-end
+b = iData_private_history(b, mfilename, a, varargin{:});
 
 % ------------------------------------------------------------------------------
 function [ny,c] = smoothsg1d(yd,N,M)

diff --git a/Objects/@iData/subsref.m b/Objects/@iData/subsref.m
@@ -80,6 +80,7 @@
 
       iData_private_warning('enter',mfilename);
 
+      b_isvector = isvector(b);
       ds=iData_getAliasValue(b,'Signal'); 
       d=ds(s.subs{:});                          % b(indices)
       b=set(b,'Signal', d);  b=setalias(b,'Signal', d);
@@ -100,19 +101,24 @@
       clear dm
 
       % must also affect axis
-      for index=1:ndims(b)
+      % for event data sets, affect all axes
+      if b_isvector, sz = b_isvector; else sz = ndims(b); end
+
+      for index=1:sz
         if index <= length(b.Alias.Axis)
           x = getaxis(b,index);
           ax= b.Alias.Axis{index};   % definition of Axis
           nd = size(x); nd=nd(nd>1);
           if length(size(x)) == length(size(a)) && ...
                  all(size(x) == size(a))  && all(length(nd) == length(s.subs)) % meshgrid type axes
             b = setaxis(b, index, ax, x(s.subs{:}));
+          elseif b_isvector && length(s.subs) == 1 % event data sets
+            b = setaxis(b, index, ax, x(s.subs{1}));
           elseif max(s.subs{index}) <= numel(x) % vector type axes
             b = setaxis(b, index, ax, x(s.subs{index}));
           else
             iData_private_warning(mfilename,[ 'The Axis ' num2str(size(index)) ' [' ...
-    num2str(size(x)) ' can not be resize as a [' num2str(size(s.subs{index})) ...
+    num2str(size(x)) ' can not be resized as a [' num2str(size(s.subs{index})) ...
     '] vector in iData object ' b.Tag ' "' b.Title '".\n\tTo use the default Error=sqrt(Signal) assign s.Error=[].' ]);
           end
         end

diff --git a/Tests/Unit/Objects/iData/test_iData_pca.m b/Tests/Unit/Objects/iData/test_iData_pca.m
@@ -0,0 +1,26 @@
+function result=test_iData_pca
+
+  A = [269.8 38.9 50.5
+272.4 39.5 50.0
+270.0 38.9 50.5
+272.0 39.3 50.2
+269.8 38.9 50.5
+269.8 38.9 50.5
+268.2 38.6 50.2
+268.2 38.6 50.8
+267.0 38.2 51.1
+267.8 38.4 51.0
+273.6 39.6 50.0
+271.2 39.1 50.4
+269.8 38.9 50.5
+270.0 38.9 50.5
+270.0 38.9 50.5
+];
+
+  a=iData(A);
+  b = pca(a);
+  if all(abs(sum(b.Coefficients) - [ -0.6142    1.6051 ])  < 1e-4 )
+    result = 1;
+  else 
+    result = 0; end
+