/**
* Copyright (c) 2004-2006 Regents of the University of California.
* See "license-prefuse.txt" for licensing terms.
*/
package prefuse.util;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import prefuse.data.Table;
import prefuse.data.Tuple;
import prefuse.data.column.ColumnMetadata;
import prefuse.data.tuple.TupleSet;
import prefuse.util.collections.DefaultLiteralComparator;
/**
* Functions for processing an iterator of tuples, including the creation
* of arrays of particular tuple data values and summary
* statistics (min, max, median, mean, standard deviation).
*
* @author jeffrey heer
*/
public class DataLib {
/**
* Get an array containing all data values for a given tuple iteration
* and field.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @return an array containing the data values
*/
public static Object[] toArray(Iterator tuples, String field) {
Object[] array = new Object[100];
int i=0;
for ( ; tuples.hasNext(); ++i ) {
if ( i >= array.length )
array = ArrayLib.resize(array, 3*array.length/2);
array[i] = ((Tuple)tuples.next()).get(field);
}
return ArrayLib.trim(array, i);
}
/**
* Get an array of doubles containing all column values for a given table
* and field. The {@link Table#canGetDouble(String)} method must return
* true for the given column name, otherwise an exception will be thrown.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @return an array of doubles containing the column values
*/
public static double[] toDoubleArray(Iterator tuples, String field) {
double[] array = new double[100];
int i=0;
for ( ; tuples.hasNext(); ++i ) {
if ( i >= array.length )
array = ArrayLib.resize(array, 3*array.length/2);
array[i] = ((Tuple)tuples.next()).getDouble(field);
}
return ArrayLib.trim(array, i);
}
// ------------------------------------------------------------------------
/**
* Get a sorted array containing all column values for a given tuple
* iterator and field.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @return an array containing the column values sorted
*/
public static Object[] ordinalArray(Iterator tuples, String field) {
return DataLib.ordinalArray(tuples, field,
DefaultLiteralComparator.getInstance());
}
/**
* Get a sorted array containing all column values for a given table and
* field.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @param cmp a comparator for sorting the column contents
* @return an array containing the column values sorted
*/
public static Object[] ordinalArray(Iterator tuples, String field,
Comparator cmp)
{
// get set of all unique values
HashSet set = new HashSet();
while ( tuples.hasNext() )
set.add(((Tuple)tuples.next()).get(field));
// sort the unique values
Object[] o = set.toArray();
Arrays.sort(o, cmp);
return o;
}
/**
* Get a sorted array containing all column values for a given tuple
* iterator and field.
* @param tuples a TupleSet
* @param field the column / data field name
* @return an array containing the column values sorted
*/
public static Object[] ordinalArray(TupleSet tuples, String field) {
return ordinalArray(tuples, field,
DefaultLiteralComparator.getInstance());
}
/**
* Get a sorted array containing all column values for a given table and
* field.
* @param tuples a TupleSet
* @param field the column / data field name
* @param cmp a comparator for sorting the column contents
* @return an array containing the column values sorted
*/
public static Object[] ordinalArray(TupleSet tuples, String field,
Comparator cmp)
{
if ( tuples instanceof Table ) {
ColumnMetadata md = ((Table)tuples).getMetadata(field);
return md.getOrdinalArray();
} else {
return ordinalArray(tuples.tuples(), field, cmp);
}
}
// ------------------------------------------------------------------------
/**
* Get map mapping from column values (as Object instances) to their
* ordinal index in a sorted array.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @return a map mapping column values to their position in a sorted
* order of values
*/
public static Map ordinalMap(Iterator tuples, String field) {
return ordinalMap(tuples, field,
DefaultLiteralComparator.getInstance());
}
/**
* Get map mapping from column values (as Object instances) to their
* ordinal index in a sorted array.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @param cmp a comparator for sorting the column contents
* @return a map mapping column values to their position in a sorted
* order of values
*/
public static Map ordinalMap(Iterator tuples, String field, Comparator cmp)
{
Object[] o = ordinalArray(tuples, field, cmp);
// map the values to the non-negative numbers
HashMap map = new HashMap();
for ( int i=0; i 0 ) {
t = tmp;
min = obj;
}
}
return t;
}
/**
* Get the Tuple with the maximum data field value.
* @param tuples a TupleSet
* @param field the column / data field name
* @return the Tuple with the maximum data field value
*/
public static Tuple max(TupleSet tuples, String field, Comparator cmp) {
if ( tuples instanceof Table ) {
Table table = (Table)tuples;
ColumnMetadata md = table.getMetadata(field);
return table.getTuple(md.getMaximumRow());
} else {
return max(tuples.tuples(), field, cmp);
}
}
/**
* Get the Tuple with the maximum data field value.
* @param tuples a TupleSet
* @param field the column / data field name
* @return the Tuple with the maximum data field value
*/
public static Tuple max(TupleSet tuples, String field) {
return max(tuples, field, DefaultLiteralComparator.getInstance());
}
// ------------------------------------------------------------------------
/**
* Get the Tuple with the median data field value.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @return the Tuple with the median data field value
*/
public static Tuple median(Iterator tuples, String field) {
return median(tuples, field, DefaultLiteralComparator.getInstance());
}
/**
* Get the Tuple with the median data field value.
* @param tuples an iterator over tuples
* @param field the column / data field name
* @param cmp a comparator for sorting the column contents
* @return the Tuple with the median data field value
*/
public static Tuple median(Iterator tuples, String field, Comparator cmp) {
Object[] t = new Tuple[100];
int i=0;
for ( ; tuples.hasNext(); ++i ) {
if ( i >= t.length )
t = ArrayLib.resize(t, 3*t.length/2);
t[i] = (Tuple)tuples.next();
}
ArrayLib.trim(t, i);
Object[] v = new Object[t.length];
int[] idx = new int[t.length];
for ( i=0; i