Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
PairedTTester.java
Package: Weka-3-2.rar [view]
Upload User: rhdiban
Upload Date: 2013-08-09
Package Size: 15085k
Code Size: 45k
Category:
Windows Develop
Development Platform:
Java
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- /*
- * PairedTTester.java
- * Copyright (C) 1999 Len Trigg
- *
- */
- package weka.experiment;
- import weka.core.Instances;
- import weka.core.Instance;
- import weka.core.Range;
- import weka.core.Attribute;
- import weka.core.Utils;
- import weka.core.FastVector;
- import weka.core.Statistics;
- import weka.core.OptionHandler;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.util.Date;
- import java.text.SimpleDateFormat;
- import java.util.Enumeration;
- import java.util.Vector;
- import weka.core.Option;
- /**
- * Calculates T-Test statistics on data stored in a set of instances.<p>
- *
- * Valid options from the command-line are:<p>
- *
- * -D num,num2... <br>
- * The column numbers that uniquely specify a dataset.
- * (default last) <p>
- *
- * -R num <br>
- * The column number containing the run number.
- * (default last) <p>
- *
- * -S num <br>
- * The significance level for T-Tests.
- * (default 0.05) <p>
- *
- * -R num,num2... <br>
- * The column numbers that uniquely specify one result generator (eg:
- * scheme name plus options).
- * (default last) <p>
- *
- * @author Len Trigg (trigg@cs.waikato.ac.nz)
- * @version $Revision: 1.14 $
- */
- public class PairedTTester implements OptionHandler {
- /** The set of instances we will analyse */
- protected Instances m_Instances;
- /** The index of the column containing the run number */
- protected int m_RunColumn = 0;
- /** The option setting for the run number column (-1 means last) */
- protected int m_RunColumnSet = -1;
- /** The significance level for comparisons */
- protected double m_SignificanceLevel = 0.05;
- /**
- * The range of columns that specify a unique "dataset"
- * (eg: scheme plus configuration)
- */
- protected Range m_DatasetKeyColumnsRange = new Range();
- /** An array containing the indexes of just the selected columns */
- protected int [] m_DatasetKeyColumns;
- /** The list of dataset specifiers */
- protected DatasetSpecifiers m_DatasetSpecifiers =
- new DatasetSpecifiers();
- /**
- * The range of columns that specify a unique result set
- * (eg: scheme plus configuration)
- */
- protected Range m_ResultsetKeyColumnsRange = new Range();
- /** An array containing the indexes of just the selected columns */
- protected int [] m_ResultsetKeyColumns;
- /** Stores a vector for each resultset holding all instances in each set */
- protected FastVector m_Resultsets = new FastVector();
- /** Indicates whether the instances have been partitioned */
- protected boolean m_ResultsetsValid;
- /** Indicates whether standard deviations should be displayed */
- protected boolean m_ShowStdDevs = false;
- /** Produce tables in latex format */
- protected boolean m_latexOutput = false;
- /* A list of unique "dataset" specifiers that have been observed */
- private class DatasetSpecifiers {
- FastVector m_Specifiers = new FastVector();
- /**
- * Removes all specifiers.
- */
- protected void removeAllSpecifiers() {
- m_Specifiers.removeAllElements();
- }
- /**
- * Add an instance to the list of specifiers (if necessary)
- */
- protected void add(Instance inst) {
- for (int i = 0; i < m_Specifiers.size(); i++) {
- Instance specifier = (Instance)m_Specifiers.elementAt(i);
- boolean found = true;
- for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
- if (inst.value(m_DatasetKeyColumns[j]) !=
- specifier.value(m_DatasetKeyColumns[j])) {
- found = false;
- }
- }
- if (found) {
- return;
- }
- }
- m_Specifiers.addElement(inst);
- }
- /**
- * Get the template at the given position.
- */
- protected Instance specifier(int i) {
- return (Instance)m_Specifiers.elementAt(i);
- }
- /**
- * Gets the number of specifiers.
- */
- protected int numSpecifiers() {
- return m_Specifiers.size();
- }
- }
- /* Utility class to store the instances pertaining to a dataset */
- private class Dataset {
- Instance m_Template;
- FastVector m_Dataset;
- public Dataset(Instance template) {
- m_Template = template;
- m_Dataset = new FastVector();
- add(template);
- }
- /**
- * Returns true if the two instances match on those attributes that have
- * been designated key columns (eg: scheme name and scheme options)
- *
- * @param first the first instance
- * @param second the second instance
- * @return true if first and second match on the currently set key columns
- */
- protected boolean matchesTemplate(Instance first) {
- for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
- if (first.value(m_DatasetKeyColumns[i]) !=
- m_Template.value(m_DatasetKeyColumns[i])) {
- return false;
- }
- }
- return true;
- }
- /**
- * Adds the given instance to the dataset
- */
- protected void add(Instance inst) {
- m_Dataset.addElement(inst);
- }
- /**
- * Returns a vector containing the instances in the dataset
- */
- protected FastVector contents() {
- return m_Dataset;
- }
- /**
- * Sorts the instances in the dataset by the run number.
- *
- * @param runColumn a value of type 'int'
- */
- public void sort(int runColumn) {
- double [] runNums = new double [m_Dataset.size()];
- for (int j = 0; j < runNums.length; j++) {
- runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn);
- }
- int [] index = Utils.sort(runNums);
- FastVector newDataset = new FastVector(runNums.length);
- for (int j = 0; j < index.length; j++) {
- newDataset.addElement(m_Dataset.elementAt(index[j]));
- }
- m_Dataset = newDataset;
- }
- }
- /* Utility class to store the instances in a resultset */
- private class Resultset {
- Instance m_Template;
- FastVector m_Datasets;
- public Resultset(Instance template) {
- m_Template = template;
- m_Datasets = new FastVector();
- add(template);
- }
- /**
- * Returns true if the two instances match on those attributes that have
- * been designated key columns (eg: scheme name and scheme options)
- *
- * @param first the first instance
- * @param second the second instance
- * @return true if first and second match on the currently set key columns
- */
- protected boolean matchesTemplate(Instance first) {
- for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
- if (first.value(m_ResultsetKeyColumns[i]) !=
- m_Template.value(m_ResultsetKeyColumns[i])) {
- return false;
- }
- }
- return true;
- }
- /**
- * Returns a string descriptive of the resultset key column values
- * for this resultset
- *
- * @return a value of type 'String'
- */
- protected String templateString() {
- String result = "";
- String tempResult = "";
- for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
- tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' ';
- // compact the string
- tempResult = Utils.removeSubstring(tempResult, "weka.classifiers.");
- tempResult = Utils.removeSubstring(tempResult, "weka.filters.");
- tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection.");
- result += tempResult;
- }
- return result.trim();
- }
- /**
- * Returns a vector containing all instances belonging to one dataset.
- *
- * @param index a template instance
- * @return a value of type 'FastVector'
- */
- public FastVector dataset(Instance inst) {
- for (int i = 0; i < m_Datasets.size(); i++) {
- if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) {
- return ((Dataset)m_Datasets.elementAt(i)).contents();
- }
- }
- return null;
- }
- /**
- * Adds an instance to this resultset
- *
- * @param newInst a value of type 'Instance'
- */
- public void add(Instance newInst) {
- for (int i = 0; i < m_Datasets.size(); i++) {
- if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) {
- ((Dataset)m_Datasets.elementAt(i)).add(newInst);
- return;
- }
- }
- Dataset newDataset = new Dataset(newInst);
- m_Datasets.addElement(newDataset);
- }
- /**
- * Sorts the instances in each dataset by the run number.
- *
- * @param runColumn a value of type 'int'
- */
- public void sort(int runColumn) {
- for (int i = 0; i < m_Datasets.size(); i++) {
- ((Dataset)m_Datasets.elementAt(i)).sort(runColumn);
- }
- }
- } // Resultset
- /**
- * Returns a string descriptive of the key column values for
- * the "datasets
- *
- * @param template the template
- * @return a value of type 'String'
- */
- private String templateString(Instance template) {
- String result = "";
- for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
- result += template.toString(m_DatasetKeyColumns[i]) + ' ';
- }
- if (result.startsWith("weka.classifiers.")) {
- result = result.substring("weka.classifiers.".length());
- }
- return result.trim();
- }
- /**
- * Set whether latex is output
- * @param l true if tables are to be produced in Latex format
- */
- public void setProduceLatex(boolean l) {
- m_latexOutput = l;
- }
- /**
- * Get whether latex is output
- * @return true if Latex is to be output
- */
- public boolean getProduceLatex() {
- return m_latexOutput;
- }
- /**
- * Set whether standard deviations are displayed or not.
- * @param s true if standard deviations are to be displayed
- */
- public void setShowStdDevs(boolean s) {
- m_ShowStdDevs = s;
- }
- /**
- * Returns true if standard deviations have been requested.
- * @return true if standard deviations are to be displayed.
- */
- public boolean getShowStdDevs() {
- return m_ShowStdDevs;
- }
- /**
- * Separates the instances into resultsets and by dataset/run.
- *
- * @exception Exception if the TTest parameters have not been set.
- */
- protected void prepareData() throws Exception {
- if (m_Instances == null) {
- throw new Exception("No instances have been set");
- }
- if (m_RunColumnSet == -1) {
- m_RunColumn = m_Instances.numAttributes() - 1;
- } else {
- m_RunColumn = m_RunColumnSet;
- }
- if (m_ResultsetKeyColumnsRange == null) {
- throw new Exception("No result specifier columns have been set");
- }
- m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
- m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection();
- if (m_DatasetKeyColumnsRange == null) {
- throw new Exception("No dataset specifier columns have been set");
- }
- m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
- m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection();
- // Split the data up into result sets
- m_Resultsets.removeAllElements();
- m_DatasetSpecifiers.removeAllSpecifiers();
- for (int i = 0; i < m_Instances.numInstances(); i++) {
- Instance current = m_Instances.instance(i);
- if (current.isMissing(m_RunColumn)) {
- throw new Exception("Instance has missing value in run "
- + "column!n" + current);
- }
- for (int j = 0; j < m_ResultsetKeyColumns.length; j++) {
- if (current.isMissing(m_ResultsetKeyColumns[j])) {
- throw new Exception("Instance has missing value in resultset key "
- + "column " + (m_ResultsetKeyColumns[j] + 1)
- + "!n" + current);
- }
- }
- for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
- if (current.isMissing(m_DatasetKeyColumns[j])) {
- throw new Exception("Instance has missing value in dataset key "
- + "column " + (m_DatasetKeyColumns[j] + 1)
- + "!n" + current);
- }
- }
- boolean found = false;
- for (int j = 0; j < m_Resultsets.size(); j++) {
- Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
- if (resultset.matchesTemplate(current)) {
- resultset.add(current);
- found = true;
- break;
- }
- }
- if (!found) {
- Resultset resultset = new Resultset(current);
- m_Resultsets.addElement(resultset);
- }
- m_DatasetSpecifiers.add(current);
- }
- // Tell each resultset to sort on the run column
- for (int j = 0; j < m_Resultsets.size(); j++) {
- Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
- resultset.sort(m_RunColumn);
- }
- m_ResultsetsValid = true;
- }
- /**
- * Gets the number of datasets in the resultsets
- *
- * @return the number of datasets in the resultsets
- */
- public int getNumDatasets() {
- if (!m_ResultsetsValid) {
- try {
- prepareData();
- } catch (Exception ex) {
- ex.printStackTrace();
- return 0;
- }
- }
- return m_DatasetSpecifiers.numSpecifiers();
- }
- /**
- * Gets the number of resultsets in the data.
- *
- * @return the number of resultsets in the data
- */
- public int getNumResultsets() {
- if (!m_ResultsetsValid) {
- try {
- prepareData();
- } catch (Exception ex) {
- ex.printStackTrace();
- return 0;
- }
- }
- return m_Resultsets.size();
- }
- /**
- * Gets a string descriptive of the specified resultset.
- *
- * @param index the index of the resultset
- * @return a descriptive string for the resultset
- */
- public String getResultsetName(int index) {
- if (!m_ResultsetsValid) {
- try {
- prepareData();
- } catch (Exception ex) {
- ex.printStackTrace();
- return null;
- }
- }
- return ((Resultset) m_Resultsets.elementAt(index)).templateString();
- }
- /**
- * Computes a paired t-test comparison for a specified dataset between
- * two resultsets.
- *
- * @param datasetSpecifier the dataset specifier
- * @param resultset1Index the index of the first resultset
- * @param resultset2Index the index of the second resultset
- * @param comparisonColumn the column containing values to compare
- * @return the results of the paired comparison
- * @exception Exception if an error occurs
- */
- public PairedStats calculateStatistics(Instance datasetSpecifier,
- int resultset1Index,
- int resultset2Index,
- int comparisonColumn) throws Exception {
- if (m_Instances.attribute(comparisonColumn).type()
- != Attribute.NUMERIC) {
- throw new Exception("Comparison column " + (comparisonColumn + 1)
- + " ("
- + m_Instances.attribute(comparisonColumn).name()
- + ") is not numeric");
- }
- if (!m_ResultsetsValid) {
- prepareData();
- }
- Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index);
- Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index);
- FastVector dataset1 = resultset1.dataset(datasetSpecifier);
- FastVector dataset2 = resultset2.dataset(datasetSpecifier);
- String datasetName = templateString(datasetSpecifier);
- if (dataset1 == null) {
- throw new Exception("No results for dataset=" + datasetName
- + " for resultset=" + resultset1.templateString());
- } else if (dataset2 == null) {
- throw new Exception("No results for dataset=" + datasetName
- + " for resultset=" + resultset2.templateString());
- } else if (dataset1.size() != dataset2.size()) {
- throw new Exception("Results for dataset=" + datasetName
- + " differ in size for resultset="
- + resultset1.templateString()
- + " and resultset="
- + resultset2.templateString()
- );
- }
- PairedStats pairedStats = new PairedStats(m_SignificanceLevel);
- for (int k = 0; k < dataset1.size(); k ++) {
- Instance current1 = (Instance) dataset1.elementAt(k);
- Instance current2 = (Instance) dataset2.elementAt(k);
- if (current1.isMissing(comparisonColumn)) {
- throw new Exception("Instance has missing value in comparison "
- + "column!n" + current1);
- }
- if (current2.isMissing(comparisonColumn)) {
- throw new Exception("Instance has missing value in comparison "
- + "column!n" + current2);
- }
- if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) {
- System.err.println("Run numbers do not match!n"
- + current1 + current2);
- }
- double value1 = current1.value(comparisonColumn);
- double value2 = current2.value(comparisonColumn);
- pairedStats.add(value1, value2);
- }
- pairedStats.calculateDerived();
- return pairedStats;
- }
- /**
- * Creates a key that maps resultset numbers to their descriptions.
- *
- * @return a value of type 'String'
- */
- public String resultsetKey() {
- if (!m_ResultsetsValid) {
- try {
- prepareData();
- } catch (Exception ex) {
- ex.printStackTrace();
- return ex.getMessage();
- }
- }
- String result = "";
- for (int j = 0; j < getNumResultsets(); j++) {
- result += "(" + (j + 1) + ") " + getResultsetName(j) + 'n';
- }
- return result + 'n';
- }
- /**
- * Creates a "header" string describing the current resultsets.
- *
- * @param comparisonColumn a value of type 'int'
- * @return a value of type 'String'
- */
- public String header(int comparisonColumn) {
- if (!m_ResultsetsValid) {
- try {
- prepareData();
- } catch (Exception ex) {
- ex.printStackTrace();
- return ex.getMessage();
- }
- }
- return "Analysing: "
- + m_Instances.attribute(comparisonColumn).name() + 'n'
- + "Datasets: " + getNumDatasets() + 'n'
- + "Resultsets: " + getNumResultsets() + 'n'
- + "Confidence: " + getSignificanceLevel() + " (two tailed)n"
- + "Date: " + (new SimpleDateFormat()).format(new Date()) + "nn";
- }
- /**
- * Carries out a comparison between all resultsets, counting the number
- * of datsets where one resultset outperforms the other.
- *
- * @param comparisonColumn the index of the comparison column
- * @return a 2d array where element [i][j] is the number of times resultset
- * j performed significantly better than resultset i.
- * @exception Exception if an error occurs
- */
- public int [][] multiResultsetWins(int comparisonColumn)
- throws Exception {
- int numResultsets = getNumResultsets();
- int [][] win = new int [numResultsets][numResultsets];
- for (int i = 0; i < numResultsets; i++) {
- for (int j = i + 1; j < numResultsets; j++) {
- System.err.print("Comparing (" + (i + 1) + ") with ("
- + (j + 1) + ")r");
- System.err.flush();
- for (int k = 0; k < getNumDatasets(); k++) {
- try {
- PairedStats pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j,
- comparisonColumn);
- if (pairedStats.differencesSignificance < 0) {
- win[i][j]++;
- } else if (pairedStats.differencesSignificance > 0) {
- win[j][i]++;
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- System.err.println(ex.getMessage());
- }
- }
- }
- }
- return win;
- }
- /**
- * Carries out a comparison between all resultsets, counting the number
- * of datsets where one resultset outperforms the other. The results
- * are summarized in a table.
- *
- * @param comparisonColumn the index of the comparison column
- * @return the results in a string
- * @exception Exception if an error occurs
- */
- public String multiResultsetSummary(int comparisonColumn)
- throws Exception {
- int [][] win = multiResultsetWins(comparisonColumn);
- int numResultsets = getNumResultsets();
- int resultsetLength = 1 + Math.max((int)(Math.log(numResultsets)
- / Math.log(10)),
- (int)(Math.log(getNumDatasets()) /
- Math.log(10)));
- String result = "";
- String titles = "";
- if (m_latexOutput) {
- result += "\begin{table}[thb]n\caption{\label{labelname}"
- +"Table Caption}n";
- result += "\footnotesizen";
- result += "{\centering \begin{tabular}{l";
- }
- for (int i = 0; i < numResultsets; i++) {
- if (m_latexOutput) {
- titles += " &";
- result += "c";
- }
- titles += ' ' + Utils.padLeft("" + (char)((int)'a' + i % 26),
- resultsetLength);
- }
- if (m_latexOutput) {
- result += "}}\\n\hlinen";
- result += titles + " \\n\hlinen";
- } else {
- result += titles + " (No. of datasets where [col] >> [row])n";
- }
- for (int i = 0; i < numResultsets; i++) {
- for (int j = 0; j < numResultsets; j++) {
- if (m_latexOutput && j == 0) {
- result += (char)((int)'a' + i % 26);
- }
- if (j == i) {
- if (m_latexOutput) {
- result += " & - ";
- } else {
- result += ' ' + Utils.padLeft("-", resultsetLength);
- }
- } else {
- if (m_latexOutput) {
- result += "& " + win[i][j] + ' ';
- } else {
- result += ' ' + Utils.padLeft("" + win[i][j], resultsetLength);
- }
- }
- }
- if (!m_latexOutput) {
- result += " | " + (char)((int)'a' + i % 26)
- + " = " + getResultsetName(i) + 'n';
- } else {
- result += "\\n";
- }
- }
- if (m_latexOutput) {
- result += "\hlinen\end{tabular} \footnotesize \par}n\end{table}";
- }
- return result;
- }
- public String multiResultsetRanking(int comparisonColumn)
- throws Exception {
- int [][] win = multiResultsetWins(comparisonColumn);
- int numResultsets = getNumResultsets();
- int [] wins = new int [numResultsets];
- int [] losses = new int [numResultsets];
- int [] diff = new int [numResultsets];
- for (int i = 0; i < win.length; i++) {
- for (int j = 0; j < win[i].length; j++) {
- wins[j] += win[i][j];
- diff[j] += win[i][j];
- losses[i] += win[i][j];
- diff[i] -= win[i][j];
- }
- }
- int biggest = Math.max(wins[Utils.maxIndex(wins)],
- losses[Utils.maxIndex(losses)]);
- int width = Math.max(2 + (int)(Math.log(biggest) / Math.log(10)),
- ">-<".length());
- String result;
- if (m_latexOutput) {
- result = "\begin{table}[thb]n\caption{\label{labelname}Table Caption"
- +"}n\footnotesizen{\centering \begin{tabular}{rlll}\\n\hlinen";
- result += "Resultset & Wins$-$ & Wins & Losses \\n& Losses & & "
- +"\\n\hlinen";
- } else {
- result = Utils.padLeft(">-<", width) + ' '
- + Utils.padLeft(">", width) + ' '
- + Utils.padLeft("<", width) + " Resultsetn";
- }
- int [] ranking = Utils.sort(diff);
- for (int i = numResultsets - 1; i >= 0; i--) {
- int curr = ranking[i];
- if (m_latexOutput) {
- result += "(" + (curr+1) + ") & "
- + Utils.padLeft("" + diff[curr], width)
- +" & " + Utils.padLeft("" + wins[curr], width)
- +" & " + Utils.padLeft("" + losses[curr], width)
- +"\\n";
- } else {
- result += Utils.padLeft("" + diff[curr], width) + ' '
- + Utils.padLeft("" + wins[curr], width) + ' '
- + Utils.padLeft("" + losses[curr], width) + ' '
- + getResultsetName(curr) + 'n';
- }
- }
- if (m_latexOutput) {
- result += "\hlinen\end{tabular} \footnotesize \par}n\end{table}";
- }
- return result;
- }
- /**
- * Generates a comparison table in latex table format
- *
- * @param baseResultset the index of the base resultset
- * @param comparisonColumn the index of the column to compare over
- * @param maxWidthMean width for the mean
- * @param maxWidthStdDev width for the standard deviation
- * @return the comparison table string
- */
- private String multiResultsetFullLatex(int baseResultset,
- int comparisonColumn,
- int maxWidthMean,
- int maxWidthStdDev) {
- StringBuffer result = new StringBuffer(1000);
- int numcols = getNumResultsets() * 2;
- if (m_ShowStdDevs) {
- numcols += getNumResultsets();
- }
- result.append("\begin{table}[thb]n\caption{\label{labelname}"
- +"Table Caption}n");
- if (!m_ShowStdDevs) {
- result.append("\footnotesizen");
- } else {
- result.append("\scriptsizen");
- }
- // output the column alignment characters
- // one for the dataset name and one for the comparison column
- if (!m_ShowStdDevs) {
- result.append("{\centering \begin{tabular}{ll");
- } else {
- // dataset, mean, std dev
- result.append("{\centering \begin{tabular}{lr@{\hspace{0cm}}l");
- }
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- if (!m_ShowStdDevs) {
- result.append("l@{\hspace{0.1cm}}l");
- } else {
- result.append("r@{\hspace{0cm}}l@{\hspace{0cm}}r");
- }
- }
- }
- result.append("}n\\n\hlinen");
- if (!m_ShowStdDevs) {
- result.append("Data Set & ("+(baseResultset+1)+")");
- } else {
- result.append("Data Set & \multicolumn{2}{c}{("+(baseResultset+1)+")}");
- }
- // now do the column names (numbers)
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- if (!m_ShowStdDevs) {
- result.append("& (" + (j + 1) + ") & ");
- } else {
- result.append("& \multicolumn{3}{c}{(" + (j + 1) + ")} ");
- }
- }
- }
- result.append("\\n\hlinen");
- int datasetLength = 25;
- int resultsetLength = maxWidthMean + 7;
- if (m_ShowStdDevs) {
- resultsetLength += (maxWidthStdDev + 5);
- }
- for (int i = 0; i < getNumDatasets(); i++) {
- // Print the name of the dataset
- String datasetName =
- templateString(m_DatasetSpecifiers.specifier(i)).replace('_','-');
- try {
- PairedStats pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(i),
- baseResultset, baseResultset,
- comparisonColumn);
- datasetName = Utils.padRight(datasetName, datasetLength);
- result.append(datasetName);
- if (!m_ShowStdDevs) {
- result.append("& "+Utils.doubleToString(pairedStats.xStats.mean,
- resultsetLength - 2, 2));
- } else {
- result.append("& "+Utils.doubleToString(pairedStats.xStats.mean,
- (maxWidthMean+5), 2)+"$\pm$");
- if (Double.isNaN(pairedStats.xStats.stdDev)) {
- result.append("&"+Utils.doubleToString(0.0,
- (maxWidthStdDev+3),2)+" ");
- } else {
- result.append("&"+Utils.doubleToString(pairedStats.xStats.stdDev,
- (maxWidthStdDev+3),2)+" ");
- }
- }
- // Iterate over the resultsets
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- try {
- pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(i),
- baseResultset, j, comparisonColumn);
- String sigString = "";
- if (pairedStats.differencesSignificance < 0) {
- sigString = "$\circ$";
- } else if (pairedStats.differencesSignificance > 0) {
- sigString = "$\bullet$";
- }
- if (!m_ShowStdDevs) {
- result.append(" & "+Utils.doubleToString(pairedStats.yStats.mean,
- resultsetLength - 2,
- 2)).append(" & "+sigString);
- } else {
- result.append(" & "
- +Utils.doubleToString(pairedStats.yStats.mean,
- (maxWidthMean+5),
- 2)+"$\pm$");
- if (Double.isNaN(pairedStats.yStats.stdDev)) {
- result.append("&"+Utils.doubleToString(0.0,
- (maxWidthStdDev+3),2)+" ");
- } else {
- result.append("&"+Utils.doubleToString(pairedStats.
- yStats.stdDev, (maxWidthStdDev+3),2)+" ");
- }
- result.append(" & ").append(sigString);
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- result.append(Utils.padLeft("", resultsetLength + 1));
- }
- }
- }
- result.append("\\n");
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- result.append("\hlinen\multicolumn{"+numcols+"}{c}{$\circ$, $\bullet$"
- +" statistically significant improvement or degradation}"
- +"\\n\end{tabular} ");
- if (!m_ShowStdDevs) {
- result.append("\footnotesize ");
- } else {
- result.append("\scriptsize ");
- }
- result.append("\par}n\end{table}"
- +"n");
- System.out.println(result.toString()+"nn");
- return result.toString();
- }
- /**
- * Generates a comparison table in latex table format
- *
- * @param baseResultset the index of the base resultset
- * @param comparisonColumn the index of the column to compare over
- * @param maxWidthMean width for the mean
- * @param maxWidthStdDev width for the standard deviation
- * @return the comparison table string
- */
- private String multiResultsetFullPlainText(int baseResultset,
- int comparisonColumn,
- int maxWidthMean,
- int maxWidthStdDev) {
- StringBuffer result = new StringBuffer(1000);
- int datasetLength = 25;
- // int resultsetLength = 9;
- // int resultsetLength = 16;
- int resultsetLength = maxWidthMean + 7;
- if (m_ShowStdDevs) {
- resultsetLength += (maxWidthStdDev + 5);
- }
- // Set up the titles
- StringBuffer titles = new StringBuffer(Utils.padRight("Dataset",
- datasetLength));
- titles.append(' ');
- StringBuffer label
- = new StringBuffer(Utils.padLeft("(" + (baseResultset + 1)
- + ") "
- + getResultsetName(baseResultset),
- resultsetLength + 3));
- titles.append(label);
- StringBuffer separator = new StringBuffer(Utils.padRight("",
- datasetLength));
- while (separator.length() < titles.length()) {
- separator.append('-');
- }
- separator.append("---");
- titles.append(" | ");
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- label = new StringBuffer(Utils.padLeft("(" + (j + 1) + ") "
- + getResultsetName(j), resultsetLength));
- titles.append(label).append(' ');
- for (int i = 0; i < label.length(); i++) {
- separator.append('-');
- }
- separator.append('-');
- }
- }
- result.append(titles).append('n').append(separator).append('n');
- // Iterate over datasets
- int [] win = new int [getNumResultsets()];
- int [] loss = new int [getNumResultsets()];
- int [] tie = new int [getNumResultsets()];
- StringBuffer skipped = new StringBuffer("");
- for (int i = 0; i < getNumDatasets(); i++) {
- // Print the name of the dataset
- String datasetName =
- templateString(m_DatasetSpecifiers.specifier(i));
- try {
- PairedStats pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(i),
- baseResultset, baseResultset,
- comparisonColumn);
- datasetName = Utils.padRight(datasetName, datasetLength);
- result.append(datasetName);
- result.append(Utils.padLeft('('
- + Utils.doubleToString(pairedStats.count,
- 0)
- + ')', 5)).append(' ');
- if (!m_ShowStdDevs) {
- result.append(Utils.doubleToString(pairedStats.xStats.mean,
- resultsetLength - 2, 2)).
- append(" | ");
- } else {
- result.append(Utils.doubleToString(pairedStats.xStats.mean,
- (maxWidthMean+5), 2));
- if (Double.isInfinite(pairedStats.xStats.stdDev)) {
- result.append('(' + Utils.padRight("Inf", maxWidthStdDev + 3)
- +')').append(" | ");
- } else {
- result.append('('+Utils.doubleToString(pairedStats.xStats.stdDev,
- (maxWidthStdDev+3),2)
- +')').append(" | ");
- }
- }
- // Iterate over the resultsets
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- try {
- pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(i),
- baseResultset, j, comparisonColumn);
- char sigChar = ' ';
- if (pairedStats.differencesSignificance < 0) {
- sigChar = 'v';
- win[j]++;
- } else if (pairedStats.differencesSignificance > 0) {
- sigChar = '*';
- loss[j]++;
- } else {
- tie[j]++;
- }
- if (!m_ShowStdDevs) {
- result.append(Utils.doubleToString(pairedStats.yStats.mean,
- resultsetLength - 2,
- 2)).append(' ')
- .append(sigChar).append(' ');
- } else {
- result.append(Utils.doubleToString(pairedStats.yStats.mean,
- (maxWidthMean+5),
- 2));
- if (Double.isInfinite(pairedStats.yStats.stdDev)) {
- result.append('('
- + Utils.padRight("Inf", maxWidthStdDev + 3)
- +')');
- } else {
- result.append('('+Utils.doubleToString(pairedStats.
- yStats.stdDev,
- (maxWidthStdDev+3),
- 2)+')');
- }
- result.append(' ').append(sigChar).append(' ');
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- result.append(Utils.padLeft("", resultsetLength + 1));
- }
- }
- }
- result.append('n');
- } catch (Exception ex) {
- ex.printStackTrace();
- skipped.append(datasetName).append(' ');
- }
- }
- result.append(separator).append('n');
- result.append(Utils.padLeft("(v/ /*)", datasetLength + 4 +
- resultsetLength)).append(" | ");
- for (int j = 0; j < getNumResultsets(); j++) {
- if (j != baseResultset) {
- result.append(Utils.padLeft("(" + win[j] + '/' + tie[j]
- + '/' + loss[j] + ')',
- resultsetLength)).append(' ');
- }
- }
- result.append('n');
- if (!skipped.equals("")) {
- result.append("Skipped: ").append(skipped).append('n');
- }
- return result.toString();
- }
- /**
- * Creates a comparison table where a base resultset is compared to the
- * other resultsets. Results are presented for every dataset.
- *
- * @param baseResultset the index of the base resultset
- * @param comparisonColumn the index of the column to compare over
- * @return the comparison table string
- * @exception Exception if an error occurs
- */
- public String multiResultsetFull(int baseResultset,
- int comparisonColumn) throws Exception {
- int maxWidthMean = 2;
- int maxWidthStdDev = 2;
- // determine max field width
- for (int i = 0; i < getNumDatasets(); i++) {
- for (int j = 0; j < getNumResultsets(); j++) {
- try {
- PairedStats pairedStats =
- calculateStatistics(m_DatasetSpecifiers.specifier(i),
- baseResultset, j, comparisonColumn);
- if (!Double.isInfinite(pairedStats.yStats.mean) &&
- !Double.isNaN(pairedStats.yStats.mean)) {
- double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) /
- Math.log(10))+1);
- if (width > maxWidthMean) {
- maxWidthMean = (int)width;
- }
- }
- if (m_ShowStdDevs &&
- !Double.isInfinite(pairedStats.yStats.stdDev) &&
- !Double.isNaN(pairedStats.yStats.stdDev)) {
- double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) /
- Math.log(10))+1);
- if (width > maxWidthStdDev) {
- maxWidthStdDev = (int)width;
- }
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- }
- StringBuffer result = new StringBuffer(1000);
- if (m_latexOutput) {
- result = new StringBuffer(multiResultsetFullLatex(baseResultset,
- comparisonColumn,
- maxWidthMean,
- maxWidthStdDev));
- } else {
- result = new StringBuffer(multiResultsetFullPlainText(baseResultset,
- comparisonColumn,
- maxWidthMean,
- maxWidthStdDev));
- }
- // append a key so that we can tell the difference between long
- // scheme+option names
- result.append("nKey:nn");
- for (int j = 0; j < getNumResultsets(); j++) {
- result.append("("+(j+1)+") ");
- result.append(getResultsetName(j)+"n");
- }
- return result.toString();
- }
- /**
- * Lists options understood by this object.
- *
- * @return an enumeration of Options.
- */
- public Enumeration listOptions() {
- Vector newVector = new Vector(5);
- newVector.addElement(new Option(
- "tSpecify list of columns that specify a uniquen"
- + "tdataset.n"
- + "tFirst and last are valid indexes. (default none)",
- "D", 1, "-D <index,index2-index4,...>"));
- newVector.addElement(new Option(
- "tSet the index of the column containing the run number",
- "R", 1, "-R <index>"));
- newVector.addElement(new Option(
- "tSpecify list of columns that specify a uniquen"
- + "t'result generator' (eg: classifier name and options).n"
- + "tFirst and last are valid indexes. (default none)",
- "G", 1, "-G <index1,index2-index4,...>"));
- newVector.addElement(new Option(
- "tSet the significance level for comparisons (default 0.05)",
- "S", 1, "-S <significance level>"));
- newVector.addElement(new Option(
- "tShow standard deviations",
- "V", 0, "-V"));
- newVector.addElement(new Option(
- "tProduce table comparisons in Latex table format",
- "L", 0, "-L"));
- return newVector.elements();
- }
- /**
- * Parses a given list of options. Valid options are:<p>
- *
- * -D num,num2... <br>
- * The column numbers that uniquely specify a dataset.
- * (default last) <p>
- *
- * -R num <br>
- * The column number containing the run number.
- * (default last) <p>
- *
- * -S num <br>
- * The significance level for T-Tests.
- * (default 0.05) <p>
- *
- * -R num,num2... <br>
- * The column numbers that uniquely specify one result generator (eg:
- * scheme name plus options).
- * (default last) <p>
- *
- * -V <br>
- * Show standard deviations <p>
- *
- * -L <br>
- * Produce comparison tables in Latex table format <p>
- *
- * @param options an array containing options to set.
- * @exception Exception if invalid options are given
- */
- public void setOptions(String[] options) throws Exception {
- setShowStdDevs(Utils.getFlag('V', options));
- setProduceLatex(Utils.getFlag('L', options));
- String datasetList = Utils.getOption('D', options);
- Range datasetRange = new Range();
- if (datasetList.length() != 0) {
- datasetRange.setRanges(datasetList);
- }
- setDatasetKeyColumns(datasetRange);
- String indexStr = Utils.getOption('R', options);
- if (indexStr.length() != 0) {
- if (indexStr.equals("first")) {
- setRunColumn(0);
- } else if (indexStr.equals("last")) {
- setRunColumn(-1);
- } else {
- setRunColumn(Integer.parseInt(indexStr) - 1);
- }
- } else {
- setRunColumn(-1);
- }
- String sigStr = Utils.getOption('S', options);
- if (sigStr.length() != 0) {
- setSignificanceLevel((new Double(sigStr)).doubleValue());
- } else {
- setSignificanceLevel(0.05);
- }
- String resultsetList = Utils.getOption('G', options);
- Range generatorRange = new Range();
- if (resultsetList.length() != 0) {
- generatorRange.setRanges(resultsetList);
- }
- setResultsetKeyColumns(generatorRange);
- }
- /**
- * Gets current settings of the PairedTTester.
- *
- * @return an array of strings containing current options.
- */
- public String[] getOptions() {
- String [] options = new String [10];
- int current = 0;
- if (!getResultsetKeyColumns().getRanges().equals("")) {
- options[current++] = "-G";
- options[current++] = getResultsetKeyColumns().getRanges();
- }
- if (!getDatasetKeyColumns().getRanges().equals("")) {
- options[current++] = "-D";
- options[current++] = getDatasetKeyColumns().getRanges();
- }
- options[current++] = "-R";
- options[current++] = "" + (getRunColumn() + 1);
- options[current++] = "-S";
- options[current++] = "" + getSignificanceLevel();
- if (getShowStdDevs()) {
- options[current++] = "-V";
- }
- if (getProduceLatex()) {
- options[current++] = "-L";
- }
- while (current < options.length) {
- options[current++] = "";
- }
- return options;
- }
- /**
- * Get the value of ResultsetKeyColumns.
- *
- * @return Value of ResultsetKeyColumns.
- */
- public Range getResultsetKeyColumns() {
- return m_ResultsetKeyColumnsRange;
- }
- /**
- * Set the value of ResultsetKeyColumns.
- *
- * @param newResultsetKeyColumns Value to assign to ResultsetKeyColumns.
- */
- public void setResultsetKeyColumns(Range newResultsetKeyColumns) {
- m_ResultsetKeyColumnsRange = newResultsetKeyColumns;
- m_ResultsetsValid = false;
- }
- /**
- * Get the value of SignificanceLevel.
- *
- * @return Value of SignificanceLevel.
- */
- public double getSignificanceLevel() {
- return m_SignificanceLevel;
- }
- /**
- * Set the value of SignificanceLevel.
- *
- * @param newSignificanceLevel Value to assign to SignificanceLevel.
- */
- public void setSignificanceLevel(double newSignificanceLevel) {
- m_SignificanceLevel = newSignificanceLevel;
- }
- /**
- * Get the value of DatasetKeyColumns.
- *
- * @return Value of DatasetKeyColumns.
- */
- public Range getDatasetKeyColumns() {
- return m_DatasetKeyColumnsRange;
- }
- /**
- * Set the value of DatasetKeyColumns.
- *
- * @param newDatasetKeyColumns Value to assign to DatasetKeyColumns.
- */
- public void setDatasetKeyColumns(Range newDatasetKeyColumns) {
- m_DatasetKeyColumnsRange = newDatasetKeyColumns;
- m_ResultsetsValid = false;
- }
- /**
- * Get the value of RunColumn.
- *
- * @return Value of RunColumn.
- */
- public int getRunColumn() {
- return m_RunColumnSet;
- }
- /**
- * Set the value of RunColumn.
- *
- * @param newRunColumn Value to assign to RunColumn.
- */
- public void setRunColumn(int newRunColumn) {
- m_RunColumnSet = newRunColumn;
- }
- /**
- * Get the value of Instances.
- *
- * @return Value of Instances.
- */
- public Instances getInstances() {
- return m_Instances;
- }
- /**
- * Set the value of Instances.
- *
- * @param newInstances Value to assign to Instances.
- */
- public void setInstances(Instances newInstances) {
- m_Instances = newInstances;
- m_ResultsetsValid = false;
- }
- /**
- * Test the class from the command line.
- *
- * @param args contains options for the instance ttests
- */
- public static void main(String args[]) {
- try {
- PairedTTester tt = new PairedTTester();
- String datasetName = Utils.getOption('t', args);
- String compareColStr = Utils.getOption('c', args);
- String baseColStr = Utils.getOption('b', args);
- boolean summaryOnly = Utils.getFlag('s', args);
- boolean rankingOnly = Utils.getFlag('r', args);
- try {
- if ((datasetName.length() == 0)
- || (compareColStr.length() == 0)) {
- throw new Exception("-t and -c options are required");
- }
- tt.setOptions(args);
- Utils.checkForRemainingOptions(args);
- } catch (Exception ex) {
- String result = "";
- Enumeration enum = tt.listOptions();
- while (enum.hasMoreElements()) {
- Option option = (Option) enum.nextElement();
- result += option.synopsis() + 'n'
- + option.description() + 'n';
- }
- throw new Exception(
- "Usage:nn"
- + "-t <file>n"
- + "tSet the dataset containing data to evaluaten"
- + "-b <index>n"
- + "tSet the resultset to base comparisons against (optional)n"
- + "-c <index>n"
- + "tSet the column to perform a comparison onn"
- + "-sn"
- + "tSummarize wins over all resultset pairsnn"
- + "-rn"
- + "tGenerate a resultset rankingnn"
- + result);
- }
- Instances data = new Instances(new BufferedReader(
- new FileReader(datasetName)));
- tt.setInstances(data);
- // tt.prepareData();
- int compareCol = Integer.parseInt(compareColStr) - 1;
- System.out.println(tt.header(compareCol));
- if (rankingOnly) {
- System.out.println(tt.multiResultsetRanking(compareCol));
- } else if (summaryOnly) {
- System.out.println(tt.multiResultsetSummary(compareCol));
- } else {
- System.out.println(tt.resultsetKey());
- if (baseColStr.length() == 0) {
- for (int i = 0; i < tt.getNumResultsets(); i++) {
- System.out.println(tt.multiResultsetFull(i, compareCol));
- }
- } else {
- int baseCol = Integer.parseInt(baseColStr) - 1;
- System.out.println(tt.multiResultsetFull(baseCol, compareCol));
- }
- }
- } catch(Exception e) {
- e.printStackTrace();
- System.err.println(e.getMessage());
- }
- }
- }