Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
PaceRegression.java
Package: Weka-3-2.rar [view]
Upload User: rhdiban
Upload Date: 2013-08-09
Package Size: 15085k
Code Size: 19k
Category:
Windows Develop
Development Platform:
Java
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
- /*
- * PaceRegression.java
- * Copyright (C) 2002 Yong Wang
- */
- package weka.classifiers.functions.pace;
- import weka.classifiers.Classifier;
- import weka.classifiers.Evaluation;
- import java.io.*;
- import java.util.*;
- import weka.core.*;
- /**
- * Class for building pace regression linear models and using them for
- * prediction. <p>
- *
- * Under regularity conditions, pace regression is provably optimal when
- * the number of coefficients tends to infinity. It consists of a group of
- * estimators that are either overall optimal or optimal under certain
- * conditions. <p>
- *
- * The current work of the pace regression theory, and therefore also this
- * implementation, do not handle: <p>
- *
- * - missing values <br>
- * - non-binary nominal attributes <br>
- * - the case that n - k is small where n is number of instances and k is
- * number of coefficients (the threshold used in this implmentation is 20)
- * <p>
- *
- * Valid options are:<p>
- *
- * -D <br>
- * Produce debugging output. <p>
- * -E estimator <br>
- * The estimator can be one of the following: <br>
- * <ul>
- * <li>eb -- Empirical Bayes estimator for noraml mixture (default) <br>
- * <li>nested -- Optimal nested model selector for normal mixture <br>
- * <li>subset -- Optimal subset selector for normal mixture <br>
- * <li>pace2 -- PACE2 for Chi-square mixture <br>
- * <li>pace4 -- PACE4 for Chi-square mixture<br>
- * <li>pace6 -- PACE6 for Chi-square mixture <br>
- * <li>ols -- Ordinary least squares estimator <br>
- * <li>aic -- AIC estimator <br>
- * <li>bic -- BIC estimator <br>
- * <li>ric -- RIC estimator <br>
- * <li>olsc -- Ordinary least squares subset selector with a threshold <br>
- * </ul>
- * -S <threshold value <br>
- * Threshold for the olsc estimator<p>
- *
- * <p>
- * REFERENCES <p>
- *
- * Wang, Y. (2000). "A new approach to fitting linear models in high
- * dimensional spaces." PhD Thesis. Department of Computer Science,
- * University of Waikato, New Zealand. <p>
- *
- * Wang, Y. and Witten, I. H. (2002). "Modeling for optimal probability
- * prediction." Proceedings of ICML'2002. Sydney. <p>
- *
- * @author Yong Wang (yongwang@cs.waikato.ac.nz)
- * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
- * @version $Revision: 1.4 $ */
- public class PaceRegression extends Classifier implements OptionHandler,
- WeightedInstancesHandler {
- /** The model used */
- Instances m_Model = null;
- /** Array for storing coefficients of linear regression. */
- private double[] m_Coefficients;
- /** The index of the class attribute */
- private int m_ClassIndex;
- /** True if debug output will be printed */
- private boolean m_Debug;
- private static final int olsEstimator = 0;
- private static final int ebEstimator = 1;
- private static final int nestedEstimator = 2;
- private static final int subsetEstimator = 3;
- private static final int pace2Estimator = 4;
- private static final int pace4Estimator = 5;
- private static final int pace6Estimator = 6;
- private static final int olscEstimator = 7;
- private static final int aicEstimator = 8;
- private static final int bicEstimator = 9;
- private static final int ricEstimator = 10;
- public static final Tag [] TAGS_ESTIMATOR = {
- new Tag(olsEstimator, "Ordinary least squares"),
- new Tag(ebEstimator, "Empirical Bayes"),
- new Tag(nestedEstimator, "Nested model selector"),
- new Tag(subsetEstimator, "Subset selector"),
- new Tag(pace2Estimator, "PACE2"),
- new Tag(pace4Estimator, "PACE4"),
- new Tag(pace6Estimator, "PACE6"),
- new Tag(olscEstimator, "Ordinary least squares selection"),
- new Tag(aicEstimator, "AIC"),
- new Tag(bicEstimator, "BIC"),
- new Tag(ricEstimator, "RIC")
- };
- private int paceEstimator = ebEstimator;
- private double olscThreshold = 2; // AIC
- /**
- * Builds a pace regression model for the given data.
- *
- * @param data the training data to be used for generating the
- * linear regression function
- * @exception Exception if the classifier could not be built successfully
- */
- public void buildClassifier(Instances data) throws Exception {
- // Checks on data model and instances
- try {
- if (!data.classAttribute().isNumeric()) {
- throw new UnsupportedClassTypeException("Class attribute has to be numeric"+
- " for pace regression!");
- }
- } catch (UnassignedClassException e) {
- System.err.println(data);
- System.err.println(data.classIndex());
- }
- if (data.numInstances() == 0) {
- throw new Exception("No instances in training file!");
- }
- if (data.checkForStringAttributes()) {
- throw new UnsupportedAttributeTypeException("Can't handle string attributes!");
- }
- if (checkForNonBinary(data)) {
- throw new UnsupportedAttributeTypeException("Can only deal with numeric and binary attributes!");
- }
- // check for missing data and throw exception if some are found
- if (checkForMissing(data)) {
- throw new NoSupportForMissingValuesException("Can't handle missing values!");
- }
- // n - k should be >= 20
- if (data.numInstances() - data.numAttributes() < 20) {
- throw new IllegalArgumentException("Not enough instances. Ratio of number of instances (n) to number of "
- + "attributes (k) is too small (n - k < 20).");
- }
- /*
- * initialize the following
- */
- m_Model = new Instances(data, 0);
- m_ClassIndex = data.classIndex();
- double[][] transformedDataMatrix =
- getTransformedDataMatrix(data, m_ClassIndex);
- double[] classValueVector = data.attributeToDoubleArray(m_ClassIndex);
- m_Coefficients = null;
- /*
- * Perform pace regression
- */
- m_Coefficients = pace(transformedDataMatrix, classValueVector);
- }
- /**
- * pace regression
- *
- * @param matrix_X matrix with observations
- * @param vector_Y vektor with class values
- * @return vector with coefficients
- * @exception Exception if pace regression cannot be done successfully
- */
- private double [] pace(double[][] matrix_X, double [] vector_Y) {
- PaceMatrix X = new PaceMatrix( matrix_X );
- PaceMatrix Y = new PaceMatrix( vector_Y, vector_Y.length );
- IntVector pvt = IntVector.seq(0, X.getColumnDimension()-1);
- int n = X.getRowDimension();
- int kr = X.getColumnDimension();
- X.lsqrSelection( Y, pvt, 1 );
- X.positiveDiagonal( Y, pvt );
- int k = pvt.size();
- PaceMatrix sol = (PaceMatrix) Y.clone();
- X.rsolve( sol, pvt, pvt.size() );
- DoubleVector betaHat = sol.getColumn(0).unpivoting(pvt, kr);
- DoubleVector r = Y.getColumn( pvt.size(), n-1, 0);
- double sde = Math.sqrt(r.sum2() / r.size());
- DoubleVector aHat = Y.getColumn( 0, pvt.size()-1, 0).times( 1./sde );
- DoubleVector aTilde = null;
- switch( paceEstimator) {
- case ebEstimator:
- case nestedEstimator:
- case subsetEstimator:
- NormalMixture d = new NormalMixture();
- d.fit( aHat, MixtureDistribution.NNMMethod );
- if( paceEstimator == ebEstimator )
- aTilde = d.empiricalBayesEstimate( aHat );
- else if( paceEstimator == ebEstimator )
- aTilde = d.subsetEstimate( aHat );
- else aTilde = d.nestedEstimate( aHat );
- break;
- case pace2Estimator:
- case pace4Estimator:
- case pace6Estimator:
- DoubleVector AHat = aHat.square();
- ChisqMixture dc = new ChisqMixture();
- dc.fit( AHat, MixtureDistribution.NNMMethod );
- DoubleVector ATilde;
- if( paceEstimator == pace6Estimator )
- ATilde = dc.pace6( AHat );
- else if( paceEstimator == pace2Estimator )
- ATilde = dc.pace2( AHat );
- else ATilde = dc.pace4( AHat );
- aTilde = ATilde.sqrt().times( aHat.sign() );
- break;
- case olsEstimator:
- aTilde = aHat.copy();
- break;
- case aicEstimator:
- case bicEstimator:
- case ricEstimator:
- case olscEstimator:
- if(paceEstimator == aicEstimator) olscThreshold = 2;
- else if(paceEstimator == bicEstimator) olscThreshold = Math.log( n );
- else if(paceEstimator == ricEstimator) olscThreshold = 2*Math.log( kr );
- aTilde = aHat.copy();
- for( int i = 0; i < aTilde.size(); i++ )
- if( Math.abs(aTilde.get(i)) < Math.sqrt(olscThreshold) )
- aTilde.set(i, 0);
- }
- PaceMatrix YTilde = new PaceMatrix((new PaceMatrix(aTilde)).times( sde ));
- X.rsolve( YTilde, pvt, pvt.size() );
- DoubleVector betaTilde = YTilde.getColumn(0).unpivoting( pvt, kr );
- return betaTilde.getArrayCopy();
- }
- /**
- * Checks if instances have a missing value.
- * @param data the data set
- * @return true if missing value is present in data set
- */
- public boolean checkForMissing(Instances data) {
- for (int i = 0; i < data.numInstances(); i++) {
- Instance inst = data.instance(i);
- for (int j = 0; j < data.numAttributes(); j++) {
- if (inst.isMissing(j)) {
- return true;
- }
- }
- }
- return false;
- }
- /**
- * Checks if an instance has a missing value.
- * @param instance the instance
- * @return true if missing value is present
- */
- public boolean checkForMissing(Instance instance, Instances model) {
- for (int j = 0; j < instance.numAttributes(); j++) {
- if (j != model.classIndex()) {
- if (instance.isMissing(j)) {
- return true;
- }
- }
- }
- return false;
- }
- /**
- * Checks if any of the nominal attributes is non-binary.
- * @param data the data set
- * @return true if non binary attribute is present
- */
- public boolean checkForNonBinary(Instances data) {
- for (int i = 0; i < data.numAttributes(); i++) {
- if (data.attribute(i).isNominal()) {
- if (data.attribute(i).numValues() != 2)
- return true;
- }
- }
- return false;
- }
- /**
- * Transforms dataset into a two-dimensional array.
- *
- * @param data dataset
- * @param classIndex index of the class attribute
- */
- private double [][] getTransformedDataMatrix(Instances data,
- int classIndex) {
- int numInstances = data.numInstances();
- int numAttributes = data.numAttributes();
- int middle = classIndex;
- if (middle < 0) {
- middle = numAttributes;
- }
- double[][] result = new double[numInstances]
- [numAttributes];
- for (int i = 0; i < numInstances; i++) {
- Instance inst = data.instance(i);
- result[i][0] = 1.0;
- // the class value (lies on index middle) is left out
- for (int j = 0; j < middle; j++) {
- result[i][j + 1] = inst.value(j);
- }
- for (int j = middle + 1; j < numAttributes; j++) {
- result[i][j] = inst.value(j);
- }
- }
- return result;
- }
- /**
- * Classifies the given instance using the linear regression function.
- *
- * @param instance the test instance
- * @return the classification
- * @exception Exception if classification can't be done successfully
- */
- public double classifyInstance(Instance instance) throws Exception {
- if (m_Coefficients == null) {
- throw new Exception("Pace Regression: No model built yet.");
- }
- // check for missing data and throw exception if some are found
- if (checkForMissing(instance, m_Model)) {
- throw new NoSupportForMissingValuesException("Can't handle missing values!");
- }
- // Calculate the dependent variable from the regression model
- return regressionPrediction(instance,
- m_Coefficients);
- }
- /**
- * Outputs the linear regression model as a string.
- */
- public String toString() {
- if (m_Coefficients == null) {
- return "Pace Regression: No model built yet.";
- }
- // try {
- StringBuffer text = new StringBuffer();
- text.append("nPace Regression Modelnn");
- text.append(m_Model.classAttribute().name()+" =nn");
- int index = 0;
- text.append(Utils.doubleToString(m_Coefficients[0],
- 12, 4) );
- for (int i = 1; i < m_Coefficients.length; i++) {
- // jump over the class attribute
- if (index == m_ClassIndex) index++;
- if (m_Coefficients[i] != 0.0) {
- // output a coefficient if unequal zero
- text.append(" +n");
- text.append(Utils.doubleToString(m_Coefficients[i], 12, 4)
- + " * ");
- text.append(m_Model.attribute(index).name());
- }
- index ++;
- }
- return text.toString();
- }
- /**
- * Returns an enumeration describing the available options.
- *
- * @return an enumeration of all the available options.
- */
- public Enumeration listOptions() {
- Vector newVector = new Vector(2);
- newVector.addElement(new Option("tProduce debugging output.n"
- + "t(default no debugging output)",
- "D", 0, "-D"));
- newVector.addElement(new Option("tThe estimator can be one of the following:n" +
- "ttebtEmpirical Bayes(default)n" +
- "ttnestedtOptimal nested modeln" +
- "ttsubsettOptimal subsetn" +
- "ttpace2tPACE2n" +
- "ttpace4tPACE4n" +
- "ttpace6tPACE6nn" +
- "ttolstOrdinary least squaresn" +
- "ttaictAICn" +
- "ttbictBICn" +
- "ttrictRICn" +
- "ttolsctOLSC",
- "E", 0, "-E <estimator>"));
- newVector.addElement(new Option("tThreshold value for the OLSC estimator",
- "S", 0, "-S <threshold value>"));
- return newVector.elements();
- }
- /**
- * Parses a given list of options. <p>
- * @param options the list of options as an array of strings
- * @exception Exception if an option is not supported
- */
- public void setOptions(String[] options) throws Exception {
- setDebug(Utils.getFlag('D', options));
- String estimator = Utils.getOption('E', options);
- if ( estimator.equals("ols") ) paceEstimator = olsEstimator;
- else if ( estimator.equals("olsc") ) paceEstimator = olscEstimator;
- else if( estimator.equals("eb") || estimator.equals("") )
- paceEstimator = ebEstimator;
- else if ( estimator.equals("nested") ) paceEstimator = nestedEstimator;
- else if ( estimator.equals("subset") ) paceEstimator = subsetEstimator;
- else if ( estimator.equals("pace2") ) paceEstimator = pace2Estimator;
- else if ( estimator.equals("pace4") ) paceEstimator = pace4Estimator;
- else if ( estimator.equals("pace6") ) paceEstimator = pace6Estimator;
- else if ( estimator.equals("aic") ) paceEstimator = aicEstimator;
- else if ( estimator.equals("bic") ) paceEstimator = bicEstimator;
- else if ( estimator.equals("ric") ) paceEstimator = ricEstimator;
- else throw new WekaException("unknown estimator " + estimator +
- " for -E option" );
- String string = Utils.getOption('S', options);
- if( ! string.equals("") ) olscThreshold = Double.parseDouble( string );
- }
- /**
- * Returns the coefficients for this linear model.
- */
- public double[] coefficients() {
- double[] coefficients = new double[m_Coefficients.length];
- for (int i = 0; i < coefficients.length; i++) {
- coefficients[i] = m_Coefficients[i];
- }
- return coefficients;
- }
- /**
- * Gets the current settings of the classifier.
- *
- * @return an array of strings suitable for passing to setOptions
- */
- public String [] getOptions() {
- String [] options = new String [6];
- int current = 0;
- if (getDebug()) {
- options[current++] = "-D";
- }
- options[current++] = "-E";
- switch (paceEstimator) {
- case olsEstimator: options[current++] = "ols";
- break;
- case olscEstimator: options[current++] = "olsc";
- options[current++] = "-S";
- options[current++] = "" + olscThreshold;
- break;
- case ebEstimator: options[current++] = "eb";
- break;
- case nestedEstimator: options[current++] = "nested";
- break;
- case subsetEstimator: options[current++] = "subset";
- break;
- case pace2Estimator: options[current++] = "pace2";
- break;
- case pace4Estimator: options[current++] = "pace4";
- break;
- case pace6Estimator: options[current++] = "pace6";
- break;
- case aicEstimator: options[current++] = "aic";
- break;
- case bicEstimator: options[current++] = "bic";
- break;
- case ricEstimator: options[current++] = "ric";
- break;
- }
- while (current < options.length) {
- options[current++] = "";
- }
- return options;
- }
- /**
- * Get the number of coefficients used in the model
- *
- * @return the number of coefficients
- */
- public int numParameters()
- {
- return m_Coefficients.length-1;
- }
- /**
- * Controls whether debugging output will be printed
- *
- * @param debug true if debugging output should be printed
- */
- public void setDebug(boolean debug) {
- m_Debug = debug;
- }
- /**
- * Controls whether debugging output will be printed
- *
- * @param debug true if debugging output should be printed
- */
- public boolean getDebug() {
- return m_Debug;
- }
- /**
- * Gets the estimator
- *
- * @return the estimator
- */
- public SelectedTag getEstimator() {
- return new SelectedTag(paceEstimator, TAGS_ESTIMATOR);
- }
- /**
- * Sets the estimator.
- *
- * @param estimator the new estimator
- */
- public void setEstimator(SelectedTag estimator) {
- if (estimator.getTags() == TAGS_ESTIMATOR) {
- paceEstimator = estimator.getSelectedTag().getID();
- }
- }
- /**
- * Set threshold for the olsc estimator
- *
- * @param threshold the threshold for the olsc estimator
- */
- public void setThreshold(double newThreshold) {
- olscThreshold = newThreshold;
- }
- /**
- * Gets the threshold for olsc estimator
- *
- * @return the threshold
- */
- public double getThreshold() {
- return olscThreshold;
- }
- /**
- * Calculate the dependent value for a given instance for a
- * given regression model.
- *
- * @param transformedInstance the input instance
- * @param selectedAttributes an array of flags indicating which
- * attributes are included in the regression model
- * @param coefficients an array of coefficients for the regression
- * model
- * @return the regression value for the instance.
- * @exception Exception if the class attribute of the input instance
- * is not assigned
- */
- private double regressionPrediction(Instance transformedInstance,
- double [] coefficients)
- throws Exception {
- int column = 0;
- double result = coefficients[column];
- for (int j = 0; j < transformedInstance.numAttributes(); j++) {
- if (m_ClassIndex != j) {
- column++;
- result += coefficients[column] * transformedInstance.value(j);
- }
- }
- return result;
- }
- /**
- * Generates a linear regression function predictor.
- *
- * @param String the options
- */
- public static void main(String argv[]) {
- Classifier scheme;
- try {
- scheme = new PaceRegression();
- System.out.println(Evaluation.evaluateModel(scheme, argv));
- } catch (Exception e) {
- e.printStackTrace();
- // System.out.println(e.getMessage());
- }
- }
- }