Facemorph
Class PCA

java.lang.Object
  extended by Facemorph.PCA

public class PCA
extends java.lang.Object

Principal Component Analysis data object


Constructor Summary
PCA()
          Constructs an empty PCA object
 
Method Summary
static void addToAverage(Template average1, PCA pca1, Template currentAverage, int n, int start, BigMat data)
          Incrementally updates the data matrix and currentAverage by adding the PCA data to them
 double[] analyse(double[] shifts)
          Finds the PCA components given the point shifts given
 double[] analyse(double[] shifts, double[] average)
          Finds the PCA components given the point shifts given
 double[] analyse(double[] shifts, double[] average, int count)
          Finds the PCA components given the point shifts given
 double[] analyse(double[] shifts, int maxComp)
          Finds the PCA components given the point shifts given
 double[] analyse(Template tem, Template avTem)
          Analyse a template to find the PCA weights
 double[] analyse(Template tem, Template avTem, int normalisation, int[] normalisationPoints)
          Analyse a template to find the PCA weights
 Template build(java.util.ArrayList<Template> templates, int normalisation, int[] normalisationPoints)
          Builds a PCA from the set of Templates given
 int build(java.util.ArrayList<Template> templates, Template avTem, int normalisation, int[] normalisationPoints)
          Builds a PCA from the set of Templates given
 int build(BigMat data)
          Builds a PCA from the data in a matrix form
 int build(BigMat data, double[] weights)
          Builds a PCA from a set of weighted samples
 java.util.ArrayList<Template> build(java.util.Vector<java.util.ArrayList<Template>> templateSets)
          Builds a PCA from a set of template sets.
 int buildPCAandAverage(BigMat data, double[] average)
          Builds a PCA from the data in a matrix form
 java.util.ArrayList<Template> buildTemplateSet(java.lang.String fileName)
          Builds a PCA from a set of template sets.
 java.util.ArrayList<double[]> buildVectors(java.util.Vector<java.util.ArrayList<double[]>> sets)
          Builds a PCA from a set of vector sets.
 Template combine(PCA pca1, Template average1, PCA pca2, Template average2)
          Combine two PCA models into a single one.
 Template combine2(PCA pca1, Template average1, PCA pca2, Template average2)
          Combine two PCA models into a single one.
 int componentsExplainedBy(double var)
          Returns the minimum number of components required to explain a certain amount of variance
 float[] findPCAFit(float[] shifts)
          Finds the PCA components given the point shifts given
 BigMat getComponents()
          Get the PCA components as a matrix
 BigMat getComponents(double[] average, boolean normalise)
          Gets the components as a matrix, pus 4 first rows hold linear approximations to rigid + scale parameters
 BigMat getComponents2(double[] average)
          Gets the components as a BigMat plus 4 initial cols for rigid params
 BigMat getComponents3D(double[] average, boolean normalise)
          Gets the components as a matrix, pus 7 first rows hold linear approximations to rigid + scale parameters for 3D
 int getCount()
          Gets the number of components in this PCA
 double getD(int i)
          Gets the ith variance
 Template[] getIntersection(Template average1, Template average2, PCA pca, double alpha, double beta)
          Find the least squares intersection between the two PCA models
 PCA getReduced(int max)
          Gets a copy of this PCA with fewer components
 double getSD(int i)
           
 int getSize()
          Gets the size of samples in this PCA
 java.util.Vector<java.util.ArrayList<Template>> getTemplateSet(java.lang.String fileName)
          Reads sets of templates from file into a Vector of ArrayList of Templates
 java.util.Vector<java.util.ArrayList<double[]>> getVectorSet(java.lang.String fileName)
          Reads a set of vector sets from a file.
static void main(java.lang.String[] args)
          Main method used for testing
 java.util.Vector<java.util.ArrayList<Template>> makeMatchingSets(java.util.Vector<java.util.ArrayList<Template>> templateSets, java.util.ArrayList<Template> averages, double varianceToExplain)
          Creates a set of matching sets from a set of Template sets.
 java.util.Vector<java.util.ArrayList<Template>> makeMatchingTemplateSets(java.lang.String fileName, double pcVar)
          Creates a set of matching sets from a set of Template sets.
 java.util.Vector<java.util.ArrayList<double[]>> makeMatchingVectorSets(java.lang.String fileName, double pcVar)
          Creates a set of matching sets from a set of double[] sets.
 java.util.Vector<java.util.ArrayList<double[]>> makeMatchingVectorSets(java.util.Vector<java.util.ArrayList<double[]>> templateSets, java.util.ArrayList<double[]> averages, double varianceToExplain)
          Creates a set of matching vector sets from a set of vector sets.
 Template normaliseEyes(Template average, Template average2)
          Rotates this PCA so that it aligns with the new average
 Template project(Template tem, Template av)
          Projects the given Template into the closest approximation in the PCA space
 Template project(Template tem, Template av, boolean cap, double capval)
          Projects the given Template into the closest approximation in the PCA space
 Template project(Template tem, Template av, int left, int right, boolean cap, double capval)
          Projects the given Template into the closest approximation in the PCA space
static void quickSort(double[] vals, int[] order, int start, int len)
          Does a quick sort on the values given
 boolean readBinary(java.io.InputStream is, double maxvar)
          Reads a PCA from a binary format (as produced by Psychomorph)
 boolean readBinary(java.lang.String file, double maxvar)
          Reads a PCA from a binary format (as produced by Psychomorph)
static double readDouble(java.io.StreamTokenizer st)
          Reads a double precision value from a text file
 void readText(java.io.InputStream in)
          Read a text format PCA file from an InputStream
 void readText(java.io.Reader r)
          Read a text format PCA file from a Reader
 void readText(java.util.Scanner sc)
          Read a text format PCA file from a Scanner
 void readText(java.io.StreamTokenizer st)
          Read a text format PCA file from a StreamTokenizer
 boolean readText(java.lang.String file)
          Reads a PCA file in a text format
 boolean readText(java.net.URL url)
          Read a text format PCA file from a URL
 double[] reconstruct(double[] parameters)
          Create a vector from the component weights
 double[] reconstruct(double[] parameters, double[] average)
          Create a vector from the component weights
 double[] reconstruct(double[] parameters, int maxComp)
          Create a vector from the component weights
 float[] reconstruct(float[] parameters)
          Create a vector from the component weights
 float[] reconstruct(float[] parameters, int maxComp)
          Create a vector from the component weights
 Template replaceMean(Template newMean, Template oldMean, int left, int right, boolean closest)
          Replaces the mean of this PCA with a new mean aligned to the old using 2 point normalisation
 void setCount(float maxvar)
          Set the number of components for this PCA
 void setCount(int c)
          Set the number of components for this PCA
 void solve(BigMat data)
          Builds this PCA from the data given
 void writeBinary(java.io.OutputStream out)
          Writes a binary format PCA to the OutputStream
 boolean writeBinary(java.lang.String file)
          Writes a PCA to file in a binary format
 boolean writeText(java.lang.String filename)
          Writes this PCA in a text format
 void writeText(java.io.Writer fw)
          Writes this PCA data to a writer in a text format
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

PCA

public PCA()
Constructs an empty PCA object

Method Detail

getReduced

public PCA getReduced(int max)
Gets a copy of this PCA with fewer components

Parameters:
max -
Returns:
returns a copy of the PCA with at most max components

readDouble

public static double readDouble(java.io.StreamTokenizer st)
                         throws java.io.IOException
Reads a double precision value from a text file

Parameters:
st - The stream tokenizer to read from
Returns:
returns the double value parsed
Throws:
java.io.IOException - could be thrown by the streamTokenizer

setCount

public void setCount(int c)
Set the number of components for this PCA

Parameters:
c -

setCount

public void setCount(float maxvar)
Set the number of components for this PCA

Parameters:
maxvar - the maximum variance this PCA should express

getComponents

public BigMat getComponents()
Get the PCA components as a matrix

Returns:
returns the PCA components in a count x size matrix

getComponents

public BigMat getComponents(double[] average,
                            boolean normalise)
Gets the components as a matrix, pus 4 first rows hold linear approximations to rigid + scale parameters

Parameters:
average - the average
normalise - indicates that the data should be normalised / orthogonalised
Returns:
a BigMat containing the PCA and rigid+scale components

getComponents3D

public BigMat getComponents3D(double[] average,
                              boolean normalise)
Gets the components as a matrix, pus 7 first rows hold linear approximations to rigid + scale parameters for 3D

Parameters:
average - the average
normalise - indicates that the data should be normalised / orthogonalised
Returns:
a BigMat containing the PCA and rigid+scale components

getComponents2

public BigMat getComponents2(double[] average)
Gets the components as a BigMat plus 4 initial cols for rigid params

Parameters:
average - the average of this PCA
Returns:
returns the components as a matrix

getCount

public int getCount()
Gets the number of components in this PCA

Returns:
returns the number of components

getSize

public int getSize()
Gets the size of samples in this PCA

Returns:
returns the vector length

getD

public double getD(int i)
Gets the ith variance

Parameters:
i - the index of the variance to get
Returns:
returns the ith element of the variance

getSD

public double getSD(int i)

componentsExplainedBy

public int componentsExplainedBy(double var)
Returns the minimum number of components required to explain a certain amount of variance

Parameters:
var - the amount of variance to explain
Returns:
the number of components required

readText

public boolean readText(java.lang.String file)
Reads a PCA file in a text format

Parameters:
file - The file to read from
Returns:
returns true if read OK

readText

public boolean readText(java.net.URL url)
Read a text format PCA file from a URL

Parameters:
url - The URL to read from
Returns:
returns true if the URL was read OK

writeText

public boolean writeText(java.lang.String filename)
Writes this PCA in a text format

Parameters:
filename - the name of the file to write to
Returns:
returns true if written OK

writeText

public void writeText(java.io.Writer fw)
               throws java.io.IOException
Writes this PCA data to a writer in a text format

Parameters:
fw - the writer to write to
Throws:
java.io.IOException

readText

public void readText(java.io.InputStream in)
              throws java.io.IOException
Read a text format PCA file from an InputStream

Parameters:
in - The InputStream to read from
Throws:
java.io.IOException

readText

public void readText(java.io.Reader r)
              throws java.io.IOException
Read a text format PCA file from a Reader

Parameters:
r - The Reader to read from
Throws:
java.io.IOException

readText

public void readText(java.io.StreamTokenizer st)
              throws java.io.IOException
Read a text format PCA file from a StreamTokenizer

Parameters:
st - The StreamTokenizer to read from
Throws:
java.io.IOException

readText

public void readText(java.util.Scanner sc)
              throws java.io.IOException
Read a text format PCA file from a Scanner

Parameters:
sc - The Scanner to read from
Throws:
java.io.IOException

readBinary

public boolean readBinary(java.lang.String file,
                          double maxvar)
                   throws java.io.FileNotFoundException,
                          java.io.IOException
Reads a PCA from a binary format (as produced by Psychomorph)

Parameters:
file - the name of the file to read from
maxvar - the maximum fraction of variance to explain/read
Returns:
returns true if the PCA has read OK
Throws:
java.io.FileNotFoundException
java.io.IOException

readBinary

public boolean readBinary(java.io.InputStream is,
                          double maxvar)
                   throws java.io.IOException
Reads a PCA from a binary format (as produced by Psychomorph)

Parameters:
is - the InputStream to read from
maxvar - the maximum fraction of variance to explain/read
Returns:
returns true if the PCA has read OK
Throws:
java.io.IOException

writeBinary

public boolean writeBinary(java.lang.String file)
Writes a PCA to file in a binary format

Parameters:
file - the file to write to
Returns:
returns true if written to file OK

writeBinary

public void writeBinary(java.io.OutputStream out)
                 throws java.io.IOException
Writes a binary format PCA to the OutputStream

Parameters:
out - the OutputStream
Throws:
java.io.IOException

analyse

public double[] analyse(Template tem,
                        Template avTem)
Analyse a template to find the PCA weights

Parameters:
tem - the Template to analyse
avTem - the average Template to subtract before analysis
Returns:
returns the principal component weights

analyse

public double[] analyse(Template tem,
                        Template avTem,
                        int normalisation,
                        int[] normalisationPoints)
Analyse a template to find the PCA weights

Parameters:
tem - the Template to analyse
avTem - the average Template to subtract before analysis
normalisation - one of ASM.TWO_POINT_NORMALISATION, ASM.RIGID_BODY_NORMALISATION or ASM.THREE_POINT_NORMALISATION
normalisationPoints - the indices of points to use in the normalisation (if any)
Returns:
returns the principal component weights

analyse

public double[] analyse(double[] shifts)
Finds the PCA components given the point shifts given

Parameters:
shifts - The shifts from the mean
Returns:
The best fit principal component weights

analyse

public double[] analyse(double[] shifts,
                        double[] average)
Finds the PCA components given the point shifts given

Parameters:
shifts - The shifts from the mean
average - the average, this will be subtracted before analysis
Returns:
The best fit principal component weights

analyse

public double[] analyse(double[] shifts,
                        double[] average,
                        int count)
Finds the PCA components given the point shifts given

Parameters:
shifts - The shifts from the mean
average - the average, this will be subtracted before analysis
count - number of pca parameters to use
Returns:
The best fit principal component weights

project

public Template project(Template tem,
                        Template av,
                        boolean cap,
                        double capval)
Projects the given Template into the closest approximation in the PCA space

Parameters:
tem - the Template to approximate
av - the average Template
cap - flag indicating if large parameter values should be capped
capval - the multiple of the standr deviation to cap each parameter at
Returns:
returns a PCA approximation of this Template

project

public Template project(Template tem,
                        Template av)
Projects the given Template into the closest approximation in the PCA space

Parameters:
tem - the Template to approximate
av - the average Template
Returns:
returns a PCA approximation of this Template

project

public Template project(Template tem,
                        Template av,
                        int left,
                        int right,
                        boolean cap,
                        double capval)
Projects the given Template into the closest approximation in the PCA space

Parameters:
tem - the Template to approximate
av - the average Template
left - the index of the first normalisation point
right - the index of the second normalisation point
cap - flag indicating if large parameter values should be capped
capval - the multiple of the standr deviation to cap each parameter at
Returns:
returns a PCA approximation of this Template

analyse

public double[] analyse(double[] shifts,
                        int maxComp)
Finds the PCA components given the point shifts given

Parameters:
shifts - The shifts from the mean
maxComp - The maximum number of components to use
Returns:
The best fit principal component weights

findPCAFit

public float[] findPCAFit(float[] shifts)
Finds the PCA components given the point shifts given

Parameters:
shifts - The shifts from the mean
Returns:
The best fit principal component weights

reconstruct

public double[] reconstruct(double[] parameters)
Create a vector from the component weights

Parameters:
parameters - The weights of the PCs
Returns:
The vector corresponding to these PCs

reconstruct

public double[] reconstruct(double[] parameters,
                            double[] average)
Create a vector from the component weights

Parameters:
parameters - The weights of the PCs
average - Th mean of the PCA distribution
Returns:
The vector corresponding to these PCs

reconstruct

public float[] reconstruct(float[] parameters)
Create a vector from the component weights

Parameters:
parameters - The weights of the PCs
Returns:
The vector corresponding to these PCs

reconstruct

public double[] reconstruct(double[] parameters,
                            int maxComp)
Create a vector from the component weights

Parameters:
parameters - The weights of the PCs
maxComp - The maximum number of components to use
Returns:
The vector corresponding to these PCs

reconstruct

public float[] reconstruct(float[] parameters,
                           int maxComp)
Create a vector from the component weights

Parameters:
parameters - The weights of the PCs
maxComp - The maximum number of components to use
Returns:
The vector corresponding to these PCs

build

public Template build(java.util.ArrayList<Template> templates,
                      int normalisation,
                      int[] normalisationPoints)
Builds a PCA from the set of Templates given

Parameters:
templates - a list of templates to build the PCA from
normalisation - a flag indicating which kind of normalisation to use (2 point, 3 point or best fit)
normalisationPoints - the list of normalisation point indexes to use
Returns:
returns the number of templates actually used (may be fewer then the total if there are problems)

build

public int build(java.util.ArrayList<Template> templates,
                 Template avTem,
                 int normalisation,
                 int[] normalisationPoints)
Builds a PCA from the set of Templates given

Parameters:
templates - a list of templates to build the PCA from
avTem - the average template, should be the average of the set
normalisation - a flag indicating which kind of normalisation to use (2 point, 3 point or best fit)
normalisationPoints - the list of normalisation point indexes to use
Returns:
returns the number of templates actually used (may be fewer then the total if there are problems)

build

public java.util.ArrayList<Template> build(java.util.Vector<java.util.ArrayList<Template>> templateSets)
Builds a PCA from a set of template sets. All templates are first aligned to the global average. Then the within set mean is subtracted from each template, and the PCA is then built from the entire set. The weightings are normalised so that each set contributes equally, independent on the number of examples.

Parameters:
templateSets - a list of Template lists
Returns:
returns an ArrayList containing the within set averages and the global average

makeMatchingSets

public java.util.Vector<java.util.ArrayList<Template>> makeMatchingSets(java.util.Vector<java.util.ArrayList<Template>> templateSets,
                                                                        java.util.ArrayList<Template> averages,
                                                                        double varianceToExplain)
Creates a set of matching sets from a set of Template sets. The main axes of within set variation across the entire set are found. Then the nearest matches within each set are found, and scaled by the within set variance of that component.

Parameters:
templateSets - the set of original template sets
averages - the average of each template set
varianceToExplain - the fraction of variance to explain in the output components
Returns:
the set of matching vectors for each set

makeMatchingTemplateSets

public java.util.Vector<java.util.ArrayList<Template>> makeMatchingTemplateSets(java.lang.String fileName,
                                                                                double pcVar)
Creates a set of matching sets from a set of Template sets. The main axes of within set variation across the entire set are found. Then the nearest matches within each set are found, and scaled by the within set variance of that component.

Parameters:
fileName - the name of the text file listing the templates to use
pcVar - the fraction of variance to explain
Returns:
returns the set of templates for each set that should match

makeMatchingVectorSets

public java.util.Vector<java.util.ArrayList<double[]>> makeMatchingVectorSets(java.lang.String fileName,
                                                                              double pcVar)
Creates a set of matching sets from a set of double[] sets. The main axes of within set variation across the entire set are found. Then the nearest matches within each set are found, and scaled by the within set variance of that component.

Parameters:
fileName - the name of the text file listing the values to use
pcVar - the fraction of variance to explain
Returns:
returns the set of shifts for each set that should match

buildVectors

public java.util.ArrayList<double[]> buildVectors(java.util.Vector<java.util.ArrayList<double[]>> sets)
Builds a PCA from a set of vector sets. The within set mean is subtracted from each vector, and the PCA is then built from the entire set. The weightings are normalised so that each set contributes equally, independent on the number of examples.

Parameters:
sets - the sets of double values to build from
Returns:
returns the means of each set and the global mean at the end.

makeMatchingVectorSets

public java.util.Vector<java.util.ArrayList<double[]>> makeMatchingVectorSets(java.util.Vector<java.util.ArrayList<double[]>> templateSets,
                                                                              java.util.ArrayList<double[]> averages,
                                                                              double varianceToExplain)
Creates a set of matching vector sets from a set of vector sets. The main axes of within set variation across the entire set are found. Then the nearest matches within each set are found, and scaled by the within set variance of that component.

Parameters:
templateSets - the set of vectors to match
averages - the array of within set and global averages
varianceToExplain - the variance to explain in this model
Returns:
the set of vectors that span a matching subspace of each vector set

buildTemplateSet

public java.util.ArrayList<Template> buildTemplateSet(java.lang.String fileName)
Builds a PCA from a set of template sets. All templates are first aligned to the global average. Then the within set mean is subtracted from each template, and the PCA is then built from the entire set. The weightings are normalised so that each set contributes equally, independent on the number of examples.

Parameters:
fileName - the name of a text file listing the templates to use
Returns:
returns an ArrayList containing the within set averages and the global average

getTemplateSet

public java.util.Vector<java.util.ArrayList<Template>> getTemplateSet(java.lang.String fileName)
Reads sets of templates from file into a Vector of ArrayList of Templates

Parameters:
fileName - the name of a text file containing a list of text file names, each of which lists images and templates for a particular subject
Returns:
returns a list of lists of Templates

getVectorSet

public java.util.Vector<java.util.ArrayList<double[]>> getVectorSet(java.lang.String fileName)
Reads a set of vector sets from a file. The first row is treated as headings. Each subsequent row contains onee vector preceded by a string indicating which group it belongs to. Whenever the group label changes a new group is added.

Parameters:
fileName - the name of the file containing the vector sets.
Returns:
the list of lists of double[] vectors.

build

public int build(BigMat data)
Builds a PCA from the data in a matrix form

Parameters:
data - The BigMat containing the data samples as rows
Returns:
returns the number of samples in the PCA

buildPCAandAverage

public int buildPCAandAverage(BigMat data,
                              double[] average)
Builds a PCA from the data in a matrix form

Parameters:
data - The BigMat containing the data samples as rows
average - used to return the average of the sample
Returns:
returns the number of samples in the PCA

build

public int build(BigMat data,
                 double[] weights)
Builds a PCA from a set of weighted samples

Parameters:
data - the matrix of data samples as rows
weights - the weighting of each row
Returns:
returns the number of samples used

quickSort

public static void quickSort(double[] vals,
                             int[] order,
                             int start,
                             int len)
Does a quick sort on the values given

Parameters:
vals - the values to sort
order - records the reordering so the same sorting to be applied to the associated vectors, should initially be in order from 0.
start - the start of the portion of the array to sort
len - the length of the portion of the array to sort

getIntersection

public Template[] getIntersection(Template average1,
                                  Template average2,
                                  PCA pca,
                                  double alpha,
                                  double beta)
Find the least squares intersection between the two PCA models

Parameters:
average1 - the mean of this PCA
average2 - the mean of pca
pca - the other PCA
alpha - a regularisation term for this (not currently used)
beta - a regularisation term for pca (not currently used)
Returns:
returns an array of 3 Templates. The first is the nearest point in this to the space spanned by pca. The second is the nearest point in pca to the space spanned by this. The third is the mean of the first and second.

replaceMean

public Template replaceMean(Template newMean,
                            Template oldMean,
                            int left,
                            int right,
                            boolean closest)
Replaces the mean of this PCA with a new mean aligned to the old using 2 point normalisation

Parameters:
newMean - the new average
oldMean - the old average
left - the left normalisation point
right - the right normalisation point
closest - boolean indicating if the new mean should be the old mean projected into the PCA space with newMean as its centre.
Returns:
returns the aligned mean or the projected old mean.

normaliseEyes

public Template normaliseEyes(Template average,
                              Template average2)
Rotates this PCA so that it aligns with the new average

Parameters:
average - the current centre
average2 - the new centre
Returns:
returns the new normalised average

addToAverage

public static void addToAverage(Template average1,
                                PCA pca1,
                                Template currentAverage,
                                int n,
                                int start,
                                BigMat data)
Incrementally updates the data matrix and currentAverage by adding the PCA data to them

Parameters:
average1 - the average of the PCA data
pca1 - the PCA data to add
currentAverage - the current global average
n - the number of templates and PCAs previously added
start - the index to start filling in the data matrix rows from
data - the place to put the scaled principal components from pca1

solve

public void solve(BigMat data)
Builds this PCA from the data given

Parameters:
data - the data to build from, samples as rows

combine

public Template combine(PCA pca1,
                        Template average1,
                        PCA pca2,
                        Template average2)
Combine two PCA models into a single one.

Parameters:
pca1 - The first PCA
average1 - The average of the first PCA
pca2 - The second PCA
average2 - The average of the second PCA
Returns:
Returns the average of the combined PCAs

combine2

public Template combine2(PCA pca1,
                         Template average1,
                         PCA pca2,
                         Template average2)
Combine two PCA models into a single one.

Parameters:
pca1 - The first PCA
average1 - The average of the first PCA
pca2 - The second PCA
average2 - The average of the second PCA
Returns:
Returns the average of the combined PCAs

main

public static void main(java.lang.String[] args)
Main method used for testing

Parameters:
args -