/** * Copyright (c) 2004-2006 Regents of the University of California. * See "license-prefuse.txt" for licensing terms. */ package prefuse.util; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import prefuse.data.Table; import prefuse.data.Tuple; import prefuse.data.column.ColumnMetadata; import prefuse.data.tuple.TupleSet; import prefuse.util.collections.DefaultLiteralComparator; /** * Functions for processing an iterator of tuples, including the creation * of arrays of particular tuple data values and summary * statistics (min, max, median, mean, standard deviation). * * @author jeffrey heer */ public class DataLib { /** * Get an array containing all data values for a given tuple iteration * and field. * @param tuples an iterator over tuples * @param field the column / data field name * @return an array containing the data values */ public static Object[] toArray(Iterator tuples, String field) { Object[] array = new Object[100]; int i=0; for ( ; tuples.hasNext(); ++i ) { if ( i >= array.length ) array = ArrayLib.resize(array, 3*array.length/2); array[i] = ((Tuple)tuples.next()).get(field); } return ArrayLib.trim(array, i); } /** * Get an array of doubles containing all column values for a given table * and field. The {@link Table#canGetDouble(String)} method must return * true for the given column name, otherwise an exception will be thrown. * @param tuples an iterator over tuples * @param field the column / data field name * @return an array of doubles containing the column values */ public static double[] toDoubleArray(Iterator tuples, String field) { double[] array = new double[100]; int i=0; for ( ; tuples.hasNext(); ++i ) { if ( i >= array.length ) array = ArrayLib.resize(array, 3*array.length/2); array[i] = ((Tuple)tuples.next()).getDouble(field); } return ArrayLib.trim(array, i); } // ------------------------------------------------------------------------ /** * Get a sorted array containing all column values for a given tuple * iterator and field. * @param tuples an iterator over tuples * @param field the column / data field name * @return an array containing the column values sorted */ public static Object[] ordinalArray(Iterator tuples, String field) { return DataLib.ordinalArray(tuples, field, DefaultLiteralComparator.getInstance()); } /** * Get a sorted array containing all column values for a given table and * field. * @param tuples an iterator over tuples * @param field the column / data field name * @param cmp a comparator for sorting the column contents * @return an array containing the column values sorted */ public static Object[] ordinalArray(Iterator tuples, String field, Comparator cmp) { // get set of all unique values HashSet set = new HashSet(); while ( tuples.hasNext() ) set.add(((Tuple)tuples.next()).get(field)); // sort the unique values Object[] o = set.toArray(); Arrays.sort(o, cmp); return o; } /** * Get a sorted array containing all column values for a given tuple * iterator and field. * @param tuples a TupleSet * @param field the column / data field name * @return an array containing the column values sorted */ public static Object[] ordinalArray(TupleSet tuples, String field) { return ordinalArray(tuples, field, DefaultLiteralComparator.getInstance()); } /** * Get a sorted array containing all column values for a given table and * field. * @param tuples a TupleSet * @param field the column / data field name * @param cmp a comparator for sorting the column contents * @return an array containing the column values sorted */ public static Object[] ordinalArray(TupleSet tuples, String field, Comparator cmp) { if ( tuples instanceof Table ) { ColumnMetadata md = ((Table)tuples).getMetadata(field); return md.getOrdinalArray(); } else { return ordinalArray(tuples.tuples(), field, cmp); } } // ------------------------------------------------------------------------ /** * Get map mapping from column values (as Object instances) to their * ordinal index in a sorted array. * @param tuples an iterator over tuples * @param field the column / data field name * @return a map mapping column values to their position in a sorted * order of values */ public static Map ordinalMap(Iterator tuples, String field) { return ordinalMap(tuples, field, DefaultLiteralComparator.getInstance()); } /** * Get map mapping from column values (as Object instances) to their * ordinal index in a sorted array. * @param tuples an iterator over tuples * @param field the column / data field name * @param cmp a comparator for sorting the column contents * @return a map mapping column values to their position in a sorted * order of values */ public static Map ordinalMap(Iterator tuples, String field, Comparator cmp) { Object[] o = ordinalArray(tuples, field, cmp); // map the values to the non-negative numbers HashMap map = new HashMap(); for ( int i=0; i 0 ) { t = tmp; min = obj; } } return t; } /** * Get the Tuple with the maximum data field value. * @param tuples a TupleSet * @param field the column / data field name * @return the Tuple with the maximum data field value */ public static Tuple max(TupleSet tuples, String field, Comparator cmp) { if ( tuples instanceof Table ) { Table table = (Table)tuples; ColumnMetadata md = table.getMetadata(field); return table.getTuple(md.getMaximumRow()); } else { return max(tuples.tuples(), field, cmp); } } /** * Get the Tuple with the maximum data field value. * @param tuples a TupleSet * @param field the column / data field name * @return the Tuple with the maximum data field value */ public static Tuple max(TupleSet tuples, String field) { return max(tuples, field, DefaultLiteralComparator.getInstance()); } // ------------------------------------------------------------------------ /** * Get the Tuple with the median data field value. * @param tuples an iterator over tuples * @param field the column / data field name * @return the Tuple with the median data field value */ public static Tuple median(Iterator tuples, String field) { return median(tuples, field, DefaultLiteralComparator.getInstance()); } /** * Get the Tuple with the median data field value. * @param tuples an iterator over tuples * @param field the column / data field name * @param cmp a comparator for sorting the column contents * @return the Tuple with the median data field value */ public static Tuple median(Iterator tuples, String field, Comparator cmp) { Object[] t = new Tuple[100]; int i=0; for ( ; tuples.hasNext(); ++i ) { if ( i >= t.length ) t = ArrayLib.resize(t, 3*t.length/2); t[i] = (Tuple)tuples.next(); } ArrayLib.trim(t, i); Object[] v = new Object[t.length]; int[] idx = new int[t.length]; for ( i=0; i