Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
CfsSubsetEval.java
Package: Weka-3-2.rar [view]
Upload User: rhdiban
Upload Date: 2013-08-09
Package Size: 15085k
Code Size: 25k
Category:
Windows Develop
Development Platform:
Java
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- /*
- * CfsSubsetEval.java
- * Copyright (C) 1999 Mark Hall
- *
- */
- package weka.attributeSelection;
- import java.io.*;
- import java.util.*;
- import weka.core.*;
- import weka.classifiers.*;
- import weka.filters.supervised.attribute.Discretize;
- import weka.filters.Filter;
- /**
- * CFS attribute subset evaluator.
- * For more information see: <p>
- *
- * Hall, M. A. (1998). Correlation-based Feature Subset Selection for Machine
- * Learning. Thesis submitted in partial fulfilment of the requirements of the
- * degree of Doctor of Philosophy at the University of Waikato. <p>
- *
- * Valid options are:
- *
- * -M <br>
- * Treat missing values as a seperate value. <p>
- *
- * -L <br>
- * Include locally predictive attributes. <p>
- *
- * @author Mark Hall (mhall@cs.waikato.ac.nz)
- * @version $Revision: 1.17 $
- */
- public class CfsSubsetEval
- extends SubsetEvaluator
- implements OptionHandler
- {
- /** The training instances */
- private Instances m_trainInstances;
- /** Discretise attributes when class in nominal */
- private Discretize m_disTransform;
- /** The class index */
- private int m_classIndex;
- /** Is the class numeric */
- private boolean m_isNumeric;
- /** Number of attributes in the training data */
- private int m_numAttribs;
- /** Number of instances in the training data */
- private int m_numInstances;
- /** Treat missing values as seperate values */
- private boolean m_missingSeperate;
- /** Include locally predicitive attributes */
- private boolean m_locallyPredictive;
- /** Holds the matrix of attribute correlations */
- private Matrix m_corr_matrix;
- /** Standard deviations of attributes (when using pearsons correlation) */
- private double[] m_std_devs;
- /** Threshold for admitting locally predictive features */
- private double m_c_Threshold;
- /**
- * Returns a string describing this attribute evaluator
- * @return a description of the evaluator suitable for
- * displaying in the explorer/experimenter gui
- */
- public String globalInfo() {
- return "CfsSubsetEval :nnEvaluates the worth of a subset of attributes "
- +"by considering the individual predictive ability of each feature "
- +"along with the degree of redundancy between them.nn"
- +"Subsets of features that are highly correlated with the class "
- +"while having low intercorrelation are preferred.n";
- }
- /**
- * Constructor
- */
- public CfsSubsetEval () {
- resetOptions();
- }
- /**
- * Returns an enumeration describing the available options.
- * @return an enumeration of all the available options.
- *
- **/
- public Enumeration listOptions () {
- Vector newVector = new Vector(3);
- newVector.addElement(new Option("tTreat missing values as a seperate"
- + "ntvalue.", "M", 0, "-M"));
- newVector.addElement(new Option("tInclude locally predictive attributes"
- + ".", "L", 0, "-L"));
- return newVector.elements();
- }
- /**
- * Parses and sets a given list of options. <p>
- *
- * Valid options are:
- *
- * -M <br>
- * Treat missing values as a seperate value. <p>
- *
- * -L <br>
- * Include locally predictive attributes. <p>
- *
- * @param options the list of options as an array of strings
- * @exception Exception if an option is not supported
- *
- **/
- public void setOptions (String[] options)
- throws Exception
- {
- String optionString;
- resetOptions();
- setMissingSeperate(Utils.getFlag('M', options));
- setLocallyPredictive(Utils.getFlag('L', options));
- }
- /**
- * Returns the tip text for this property
- * @return tip text for this property suitable for
- * displaying in the explorer/experimenter gui
- */
- public String locallyPredictiveTipText() {
- return "Identify locally predictive attributes. Iteratively adds "
- +"attributes with the highest correlation with the class as long "
- +"as there is not already an attribute in the subset that has a "
- +"higher correlation with the attribute in question";
- }
- /**
- * Include locally predictive attributes
- *
- * @param b true or false
- */
- public void setLocallyPredictive (boolean b) {
- m_locallyPredictive = b;
- }
- /**
- * Return true if including locally predictive attributes
- *
- * @return true if locally predictive attributes are to be used
- */
- public boolean getLocallyPredictive () {
- return m_locallyPredictive;
- }
- /**
- * Returns the tip text for this property
- * @return tip text for this property suitable for
- * displaying in the explorer/experimenter gui
- */
- public String missingSeperateTipText() {
- return "Treat missing as a separate value. Otherwise, counts for missing "
- +"values are distributed across other values in proportion to their "
- +"frequency.";
- }
- /**
- * Treat missing as a seperate value
- *
- * @param b true or false
- */
- public void setMissingSeperate (boolean b) {
- m_missingSeperate = b;
- }
- /**
- * Return true is missing is treated as a seperate value
- *
- * @return true if missing is to be treated as a seperate value
- */
- public boolean getMissingSeperate () {
- return m_missingSeperate;
- }
- /**
- * Gets the current settings of CfsSubsetEval
- *
- * @return an array of strings suitable for passing to setOptions()
- */
- public String[] getOptions () {
- String[] options = new String[2];
- int current = 0;
- if (getMissingSeperate()) {
- options[current++] = "-M";
- }
- if (getLocallyPredictive()) {
- options[current++] = "-L";
- }
- while (current < options.length) {
- options[current++] = "";
- }
- return options;
- }
- /**
- * Generates a attribute evaluator. Has to initialize all fields of the
- * evaluator that are not being set via options.
- *
- * CFS also discretises attributes (if necessary) and initializes
- * the correlation matrix.
- *
- * @param data set of instances serving as training data
- * @exception Exception if the evaluator has not been
- * generated successfully
- */
- public void buildEvaluator (Instances data)
- throws Exception
- {
- if (data.checkForStringAttributes()) {
- throw new UnsupportedAttributeTypeException("Can't handle string attributes!");
- }
- m_trainInstances = data;
- m_trainInstances.deleteWithMissingClass();
- m_classIndex = m_trainInstances.classIndex();
- m_numAttribs = m_trainInstances.numAttributes();
- m_numInstances = m_trainInstances.numInstances();
- m_isNumeric = m_trainInstances.attribute(m_classIndex).isNumeric();
- if (!m_isNumeric) {
- m_disTransform = new Discretize();
- m_disTransform.setUseBetterEncoding(true);
- m_disTransform.setInputFormat(m_trainInstances);
- m_trainInstances = Filter.useFilter(m_trainInstances, m_disTransform);
- }
- m_std_devs = new double[m_numAttribs];
- m_corr_matrix = new Matrix(m_numAttribs, m_numAttribs);
- for (int i = 0; i < m_corr_matrix.numRows(); i++) {
- m_corr_matrix.setElement(i, i, 1.0);
- m_std_devs[i] = 1.0;
- }
- for (int i = 0; i < m_numAttribs; i++) {
- for (int j = i + 1; j < m_numAttribs; j++) {
- m_corr_matrix.setElement(i, j, -999);
- m_corr_matrix.setElement(j, i, -999);
- }
- }
- }
- /**
- * evaluates a subset of attributes
- *
- * @param subset a bitset representing the attribute subset to be
- * evaluated
- * @exception Exception if the subset could not be evaluated
- */
- public double evaluateSubset (BitSet subset)
- throws Exception
- {
- double num = 0.0;
- double denom = 0.0;
- double corr;
- // do numerator
- for (int i = 0; i < m_numAttribs; i++) {
- if (i != m_classIndex) {
- if (subset.get(i)) {
- if (m_corr_matrix.getElement(i, m_classIndex) == -999) {
- corr = correlate(i, m_classIndex);
- m_corr_matrix.setElement(i, m_classIndex, corr);
- m_corr_matrix.setElement(m_classIndex, i, corr);
- num += (m_std_devs[i] * corr);
- }
- else {num += (m_std_devs[i] *
- m_corr_matrix.getElement(i, m_classIndex));
- }
- }
- }
- }
- // do denominator
- for (int i = 0; i < m_numAttribs; i++) {
- if (i != m_classIndex) {
- if (subset.get(i)) {
- denom += (1.0 * m_std_devs[i] * m_std_devs[i]);
- for (int j = i + 1; j < m_numAttribs; j++) {if (subset.get(j)) {
- if (m_corr_matrix.getElement(i, j) == -999) {
- corr = correlate(i, j);
- m_corr_matrix.setElement(i, j, corr);
- m_corr_matrix.setElement(j, i, corr);
- denom += (2.0 * m_std_devs[i] * m_std_devs[j] * corr);
- }
- else {denom += (2.0 * m_std_devs[i] * m_std_devs[j] *
- m_corr_matrix.getElement(i, j));
- }
- }
- }
- }
- }
- }
- if (denom < 0.0) {
- denom *= -1.0;
- }
- if (denom == 0.0) {
- return (0.0);
- }
- double merit = (num/Math.sqrt(denom));
- if (merit < 0.0) {
- merit *= -1.0;
- }
- return merit;
- }
- private double correlate (int att1, int att2) {
- if (!m_isNumeric) {
- return symmUncertCorr(att1, att2);
- }
- boolean att1_is_num = (m_trainInstances.attribute(att1).isNumeric());
- boolean att2_is_num = (m_trainInstances.attribute(att2).isNumeric());
- if (att1_is_num && att2_is_num) {
- return num_num(att1, att2);
- }
- else {if (att2_is_num) {
- return num_nom2(att1, att2);
- }
- else {if (att1_is_num) {
- return num_nom2(att2, att1);
- }
- }
- }
- return nom_nom(att1, att2);
- }
- private double symmUncertCorr (int att1, int att2) {
- int i, j, k, ii, jj;
- int nnj, nni, ni, nj;
- double sum = 0.0;
- double sumi[], sumj[];
- double counts[][];
- Instance inst;
- double corr_measure;
- boolean flag = false;
- double temp = 0.0;
- if (att1 == m_classIndex || att2 == m_classIndex) {
- flag = true;
- }
- ni = m_trainInstances.attribute(att1).numValues() + 1;
- nj = m_trainInstances.attribute(att2).numValues() + 1;
- counts = new double[ni][nj];
- sumi = new double[ni];
- sumj = new double[nj];
- for (i = 0; i < ni; i++) {
- sumi[i] = 0.0;
- for (j = 0; j < nj; j++) {
- sumj[j] = 0.0;
- counts[i][j] = 0.0;
- }
- }
- // Fill the contingency table
- for (i = 0; i < m_numInstances; i++) {
- inst = m_trainInstances.instance(i);
- if (inst.isMissing(att1)) {
- ii = ni - 1;
- }
- else {
- ii = (int)inst.value(att1);
- }
- if (inst.isMissing(att2)) {
- jj = nj - 1;
- }
- else {
- jj = (int)inst.value(att2);
- }
- counts[ii][jj]++;
- }
- // get the row totals
- for (i = 0; i < ni; i++) {
- sumi[i] = 0.0;
- for (j = 0; j < nj; j++) {
- sumi[i] += counts[i][j];
- sum += counts[i][j];
- }
- }
- // get the column totals
- for (j = 0; j < nj; j++) {
- sumj[j] = 0.0;
- for (i = 0; i < ni; i++) {
- sumj[j] += counts[i][j];
- }
- }
- // distribute missing counts
- if (!m_missingSeperate &&
- (sumi[ni-1] < m_numInstances) &&
- (sumj[nj-1] < m_numInstances)) {
- double[] i_copy = new double[sumi.length];
- double[] j_copy = new double[sumj.length];
- double[][] counts_copy = new double[sumi.length][sumj.length];
- for (i = 0; i < ni; i++) {
- System.arraycopy(counts[i], 0, counts_copy[i], 0, sumj.length);
- }
- System.arraycopy(sumi, 0, i_copy, 0, sumi.length);
- System.arraycopy(sumj, 0, j_copy, 0, sumj.length);
- double total_missing =
- (sumi[ni - 1] + sumj[nj - 1] - counts[ni - 1][nj - 1]);
- // do the missing i's
- if (sumi[ni - 1] > 0.0) {
- for (j = 0; j < nj - 1; j++) {
- if (counts[ni - 1][j] > 0.0) {
- for (i = 0; i < ni - 1; i++) {
- temp = ((i_copy[i]/(sum - i_copy[ni - 1]))*counts[ni - 1][j]);
- counts[i][j] += temp;
- sumi[i] += temp;
- }
- counts[ni - 1][j] = 0.0;
- }
- }
- }
- sumi[ni - 1] = 0.0;
- // do the missing j's
- if (sumj[nj - 1] > 0.0) {
- for (i = 0; i < ni - 1; i++) {
- if (counts[i][nj - 1] > 0.0) {
- for (j = 0; j < nj - 1; j++) {
- temp = ((j_copy[j]/(sum - j_copy[nj - 1]))*counts[i][nj - 1]);
- counts[i][j] += temp;
- sumj[j] += temp;
- }
- counts[i][nj - 1] = 0.0;
- }
- }
- }
- sumj[nj - 1] = 0.0;
- // do the both missing
- if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {
- for (i = 0; i < ni - 1; i++) {
- for (j = 0; j < nj - 1; j++) {
- temp = (counts_copy[i][j]/(sum - total_missing)) *
- counts_copy[ni - 1][nj - 1];
- counts[i][j] += temp;
- sumi[i] += temp;
- sumj[j] += temp;
- }
- }
- counts[ni - 1][nj - 1] = 0.0;
- }
- }
- // corr_measure = Correlate.symm_uncert(counts,sumi,sumj,sum,ni,nj,flag);
- corr_measure = ContingencyTables.symmetricalUncertainty(counts);
- // corr_measure = ContingencyTables.gainRatio(counts);
- if (Utils.eq(corr_measure, 0.0)) {
- if (flag == true) {
- return (0.0);
- }
- else {
- return (1.0);
- }
- }
- else {
- return (corr_measure);
- }
- }
- private double num_num (int att1, int att2) {
- int i;
- Instance inst;
- double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0;
- double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1));
- double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2));
- for (i = 0; i < m_numInstances; i++) {
- inst = m_trainInstances.instance(i);
- diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx);
- diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);
- num += (diff1*diff2);
- sx += (diff1*diff1);
- sy += (diff2*diff2);
- }
- if (sx != 0.0) {
- if (m_std_devs[att1] == 1.0) {
- m_std_devs[att1] = Math.sqrt((sx/m_numInstances));
- }
- }
- if (sy != 0.0) {
- if (m_std_devs[att2] == 1.0) {
- m_std_devs[att2] = Math.sqrt((sy/m_numInstances));
- }
- }
- if ((sx*sy) > 0.0) {
- r = (num/(Math.sqrt(sx*sy)));
- return ((r < 0.0)? -r : r);
- }
- else {
- if (att1 != m_classIndex && att2 != m_classIndex) {
- return 1.0;
- }
- else {
- return 0.0;
- }
- }
- }
- private double num_nom2 (int att1, int att2) {
- int i, ii, k;
- double temp;
- Instance inst;
- int mx = (int)m_trainInstances.
- meanOrMode(m_trainInstances.attribute(att1));
- double my = m_trainInstances.
- meanOrMode(m_trainInstances.attribute(att2));
- double stdv_num = 0.0;
- double diff1, diff2;
- double r = 0.0, rr, max_corr = 0.0;
- int nx = (!m_missingSeperate)
- ? m_trainInstances.attribute(att1).numValues()
- : m_trainInstances.attribute(att1).numValues() + 1;
- double[] prior_nom = new double[nx];
- double[] stdvs_nom = new double[nx];
- double[] covs = new double[nx];
- for (i = 0; i < nx; i++) {
- stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0;
- }
- // calculate frequencies (and means) of the values of the nominal
- // attribute
- for (i = 0; i < m_numInstances; i++) {
- inst = m_trainInstances.instance(i);
- if (inst.isMissing(att1)) {
- if (!m_missingSeperate) {
- ii = mx;
- }
- else {
- ii = nx - 1;
- }
- }
- else {
- ii = (int)inst.value(att1);
- }
- // increment freq for nominal
- prior_nom[ii]++;
- }
- for (k = 0; k < m_numInstances; k++) {
- inst = m_trainInstances.instance(k);
- // std dev of numeric attribute
- diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);
- stdv_num += (diff2*diff2);
- //
- for (i = 0; i < nx; i++) {
- if (inst.isMissing(att1)) {
- if (!m_missingSeperate) {
- temp = (i == mx)? 1.0 : 0.0;
- }
- else {
- temp = (i == (nx - 1))? 1.0 : 0.0;
- }
- }
- else {
- temp = (i == inst.value(att1))? 1.0 : 0.0;
- }
- diff1 = (temp - (prior_nom[i]/m_numInstances));
- stdvs_nom[i] += (diff1*diff1);
- covs[i] += (diff1*diff2);
- }
- }
- // calculate weighted correlation
- for (i = 0, temp = 0.0; i < nx; i++) {
- // calculate the weighted variance of the nominal
- temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances));
- if ((stdvs_nom[i]*stdv_num) > 0.0) {
- //System.out.println("Stdv :"+stdvs_nom[i]);
- rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num)));
- if (rr < 0.0) {
- rr = -rr;
- }
- r += ((prior_nom[i]/m_numInstances)*rr);
- }
- /* if there is zero variance for the numeric att at a specific
- level of the catergorical att then if neither is the class then
- make this correlation at this level maximally bad i.e. 1.0.
- If either is the class then maximally bad correlation is 0.0 */
- else {if (att1 != m_classIndex && att2 != m_classIndex) {
- r += ((prior_nom[i]/m_numInstances)*1.0);
- }
- }
- }
- // set the standard deviations for these attributes if necessary
- // if ((att1 != classIndex) && (att2 != classIndex)) // =============
- if (temp != 0.0) {
- if (m_std_devs[att1] == 1.0) {
- m_std_devs[att1] = Math.sqrt(temp);
- }
- }
- if (stdv_num != 0.0) {
- if (m_std_devs[att2] == 1.0) {
- m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances));
- }
- }
- if (r == 0.0) {
- if (att1 != m_classIndex && att2 != m_classIndex) {
- r = 1.0;
- }
- }
- return r;
- }
- private double nom_nom (int att1, int att2) {
- int i, j, ii, jj, z;
- double temp1, temp2;
- Instance inst;
- int mx = (int)m_trainInstances.
- meanOrMode(m_trainInstances.attribute(att1));
- int my = (int)m_trainInstances.
- meanOrMode(m_trainInstances.attribute(att2));
- double diff1, diff2;
- double r = 0.0, rr, max_corr = 0.0;
- int nx = (!m_missingSeperate)
- ? m_trainInstances.attribute(att1).numValues()
- : m_trainInstances.attribute(att1).numValues() + 1;
- int ny = (!m_missingSeperate)
- ? m_trainInstances.attribute(att2).numValues()
- : m_trainInstances.attribute(att2).numValues() + 1;
- double[][] prior_nom = new double[nx][ny];
- double[] sumx = new double[nx];
- double[] sumy = new double[ny];
- double[] stdvsx = new double[nx];
- double[] stdvsy = new double[ny];
- double[][] covs = new double[nx][ny];
- for (i = 0; i < nx; i++) {
- sumx[i] = stdvsx[i] = 0.0;
- }
- for (j = 0; j < ny; j++) {
- sumy[j] = stdvsy[j] = 0.0;
- }
- for (i = 0; i < nx; i++) {
- for (j = 0; j < ny; j++) {
- covs[i][j] = prior_nom[i][j] = 0.0;
- }
- }
- // calculate frequencies (and means) of the values of the nominal
- // attribute
- for (i = 0; i < m_numInstances; i++) {
- inst = m_trainInstances.instance(i);
- if (inst.isMissing(att1)) {
- if (!m_missingSeperate) {
- ii = mx;
- }
- else {
- ii = nx - 1;
- }
- }
- else {
- ii = (int)inst.value(att1);
- }
- if (inst.isMissing(att2)) {
- if (!m_missingSeperate) {
- jj = my;
- }
- else {
- jj = ny - 1;
- }
- }
- else {
- jj = (int)inst.value(att2);
- }
- // increment freq for nominal
- prior_nom[ii][jj]++;
- sumx[ii]++;
- sumy[jj]++;
- }
- for (z = 0; z < m_numInstances; z++) {
- inst = m_trainInstances.instance(z);
- for (j = 0; j < ny; j++) {
- if (inst.isMissing(att2)) {
- if (!m_missingSeperate) {
- temp2 = (j == my)? 1.0 : 0.0;
- }
- else {
- temp2 = (j == (ny - 1))? 1.0 : 0.0;
- }
- }
- else {
- temp2 = (j == inst.value(att2))? 1.0 : 0.0;
- }
- diff2 = (temp2 - (sumy[j]/m_numInstances));
- stdvsy[j] += (diff2*diff2);
- }
- //
- for (i = 0; i < nx; i++) {
- if (inst.isMissing(att1)) {
- if (!m_missingSeperate) {
- temp1 = (i == mx)? 1.0 : 0.0;
- }
- else {
- temp1 = (i == (nx - 1))? 1.0 : 0.0;
- }
- }
- else {
- temp1 = (i == inst.value(att1))? 1.0 : 0.0;
- }
- diff1 = (temp1 - (sumx[i]/m_numInstances));
- stdvsx[i] += (diff1*diff1);
- for (j = 0; j < ny; j++) {
- if (inst.isMissing(att2)) {
- if (!m_missingSeperate) {
- temp2 = (j == my)? 1.0 : 0.0;
- }
- else {
- temp2 = (j == (ny - 1))? 1.0 : 0.0;
- }
- }
- else {
- temp2 = (j == inst.value(att2))? 1.0 : 0.0;
- }
- diff2 = (temp2 - (sumy[j]/m_numInstances));
- covs[i][j] += (diff1*diff2);
- }
- }
- }
- // calculate weighted correlation
- for (i = 0; i < nx; i++) {
- for (j = 0; j < ny; j++) {
- if ((stdvsx[i]*stdvsy[j]) > 0.0) {
- //System.out.println("Stdv :"+stdvs_nom[i]);
- rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j])));
- if (rr < 0.0) {
- rr = -rr;
- }
- r += ((prior_nom[i][j]/m_numInstances)*rr);
- }
- // if there is zero variance for either of the categorical atts then if
- // neither is the class then make this
- // correlation at this level maximally bad i.e. 1.0. If either is
- // the class then maximally bad correlation is 0.0
- else {if (att1 != m_classIndex && att2 != m_classIndex) {
- r += ((prior_nom[i][j]/m_numInstances)*1.0);
- }
- }
- }
- }
- // calculate weighted standard deviations for these attributes
- // (if necessary)
- for (i = 0, temp1 = 0.0; i < nx; i++) {
- temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances));
- }
- if (temp1 != 0.0) {
- if (m_std_devs[att1] == 1.0) {
- m_std_devs[att1] = Math.sqrt(temp1);
- }
- }
- for (j = 0, temp2 = 0.0; j < ny; j++) {
- temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances));
- }
- if (temp2 != 0.0) {
- if (m_std_devs[att2] == 1.0) {
- m_std_devs[att2] = Math.sqrt(temp2);
- }
- }
- if (r == 0.0) {
- if (att1 != m_classIndex && att2 != m_classIndex) {
- r = 1.0;
- }
- }
- return r;
- }
- /**
- * returns a string describing CFS
- *
- * @return the description as a string
- */
- public String toString () {
- StringBuffer text = new StringBuffer();
- if (m_trainInstances == null) {
- text.append("CFS subset evaluator has not been built yetn");
- }
- else {
- text.append("tCFS Subset Evaluatorn");
- if (m_missingSeperate) {
- text.append("tTreating missing values as a seperate valuen");
- }
- if (m_locallyPredictive) {
- text.append("tIncluding locally predictive attributesn");
- }
- }
- return text.toString();
- }
- private void addLocallyPredictive (BitSet best_group) {
- int i, j;
- boolean done = false;
- boolean ok = true;
- double temp_best = -1.0;
- double corr;
- j = 0;
- BitSet temp_group = (BitSet)best_group.clone();
- while (!done) {
- temp_best = -1.0;
- // find best not already in group
- for (i = 0; i < m_numAttribs; i++) {
- if ((!temp_group.get(i)) && (i != m_classIndex)) {
- if (m_corr_matrix.getElement(i, m_classIndex) == -999) {
- corr = correlate(i, m_classIndex);
- m_corr_matrix.setElement(i, m_classIndex, corr);
- m_corr_matrix.setElement(m_classIndex, i, corr);
- }
- if (m_corr_matrix.getElement(i, m_classIndex) > temp_best) {
- temp_best = m_corr_matrix.getElement(i, m_classIndex);
- j = i;
- }
- }
- }
- if (temp_best == -1.0) {
- done = true;
- }
- else {
- ok = true;
- temp_group.set(j);
- // check the best against correlations with others already
- // in group
- for (i = 0; i < m_numAttribs; i++) {
- if (best_group.get(i)) {
- if (m_corr_matrix.getElement(i, j) == -999) {
- corr = correlate(i, j);
- m_corr_matrix.setElement(i, j, corr);
- m_corr_matrix.setElement(j, i, corr);
- }
- if (m_corr_matrix.getElement(i, j) > temp_best - m_c_Threshold) {
- ok = false;
- break;
- }
- }
- }
- // if ok then add to best_group
- if (ok) {
- best_group.set(j);
- }
- }
- }
- }
- /**
- * Calls locallyPredictive in order to include locally predictive
- * attributes (if requested).
- *
- * @param attributeSet the set of attributes found by the search
- * @return a possibly ranked list of postprocessed attributes
- * @exception Exception if postprocessing fails for some reason
- */
- public int[] postProcess (int[] attributeSet)
- throws Exception
- {
- int j = 0;
- if (!m_locallyPredictive) {
- // m_trainInstances = new Instances(m_trainInstances,0);
- return attributeSet;
- }
- BitSet bestGroup = new BitSet(m_numAttribs);
- for (int i = 0; i < attributeSet.length; i++) {
- bestGroup.set(attributeSet[i]);
- }
- addLocallyPredictive(bestGroup);
- // count how many are set
- for (int i = 0; i < m_numAttribs; i++) {
- if (bestGroup.get(i)) {
- j++;
- }
- }
- int[] newSet = new int[j];
- j = 0;
- for (int i = 0; i < m_numAttribs; i++) {
- if (bestGroup.get(i)) {
- newSet[j++] = i;
- }
- }
- // m_trainInstances = new Instances(m_trainInstances,0);
- return newSet;
- }
- protected void resetOptions () {
- m_trainInstances = null;
- m_missingSeperate = false;
- m_locallyPredictive = false;
- m_c_Threshold = 0.0;
- }
- /**
- * Main method for testing this class.
- *
- * @param args the options
- */
- public static void main (String[] args) {
- try {
- System.out.println(AttributeSelection.
- SelectAttributes(new CfsSubsetEval(), args));
- }
- catch (Exception e) {
- e.printStackTrace();
- System.out.println(e.getMessage());
- }
- }
- }