/*************
 *
 * sortWords.c
 *
 * September 2002 assignment in Algorithms class
 *
 *   -- original assignment -- 
 * 
 *   Expand the linked list example (see ../listwords/) to include sorting 
 *   the list. Again, find the time it takes to run as a function of 
 *   nWords. What is its behavior on this data? 
 *
 *   Modify the program again so that it stores the data in an array 
 *   rather than a list. Does this make it faster or slower?
 *    What are the advantages or disadvantages? 
 *
 * Jim Mahoney (mahoney@marlboro.edu)
 *
 * -- version history -------------------------------------------------
 *
 * version 2.3
 * added timing for sortList
 * added clonePartialList and analysis routines
 * BUT note that I'm not cleaning up any memory, but just creating
 * more copies of the list to sort.  EVERYTHING is a memory leak right now.
 *
 * version 2.2 
 *  debugged sortList in class
 *
 * version 2.1 Oct 2
 *  working on sortingList - in progress.
 *
 * version 2.0, Sep 30		(version 1.x is ../listwords/listWords.c)
 *  added and tested cloneList, cloneElment
 * 
 ******************************************************/

#include <stdio.h>
#include <string.h>
#include <sys/time.h>

// ===== defines, typedefs and subroutine declarations  ================
// ===== (Might put into a .h file later.) =============================

// maximum number of characters in one word.
#define MaxChars 128

// A doubly linked list element to hold a string and its value.
typedef struct ListElementStruct_ {
  struct ListElementStruct_*	next;
  struct ListElementStruct_*	prev;
  char*				data;
  int				value;
} ListElementStruct, *ListElement;

typedef struct ListStruct_ {
  ListElement			first, last;
  int				nElements;
} ListStruct, *List;

int getStringValue(char* aString);

List makeEmptyList();
ListElement makeElement(char* inputData);
void printElement(ListElement theList);
void printList(List theList);
void appendElementToList(List theList, ListElement newElement);
void appendStringToList(List theList, char* theString);
ListElement nthElement(List theList, int n);
List cloneList(List theList);
List clonePartialList(List theList, int n);
ListElement cloneElement(ListElement theElement);

List readWordsIntoList(int howManyWords);

void findMaxValue(List theList);
ListElement findMaxOfNWords(ListElement element, int nWords);
double timeFindMax(int nWords, List theList);
void analyzeFindMax(int nPoints);

void sortList(List theList);

void testList();
void testGetStringValue();
void trialReadWords();

// ==== subroutine implementations ==============================

// -----------------------------------------------------------
// Create and return a new empty list.
List makeEmptyList() {
  List newList = (List)malloc(sizeof(ListStruct));
  newList->nElements = 0;
  newList->first = NULL;
  newList->last = NULL;
  return(newList);
}

// -----------------------------------------------------------
// Create and return a new list element given an input string.
// Also, calculate and store the value of the string.
ListElement makeElement(char* inputData){
  ListElement newElement = (ListElement)malloc(sizeof(ListElementStruct));
  newElement->next  = NULL;
  newElement->prev  = NULL;
  newElement->data  = (char*)malloc(strlen(inputData));
  newElement->value = getStringValue(inputData);
  strcpy(newElement->data, inputData);
  return(newElement);
}

// -----------------------------------------------------------
// Create and return a shallow copy of a given list element.
// This routine does *not* copy the data, but instead points
// the new element at the same data.  
// The prev and next links are set to NULL.
ListElement cloneElement(ListElement theElement){
  ListElement thisElement = (ListElement)malloc(sizeof(ListElementStruct));
  thisElement->next  = NULL;
  thisElement->prev  = NULL;
  thisElement->data  = theElement->data;
  thisElement->value = theElement->value;
  return(thisElement);
}

// -----------------------------------------------------------
// Create and return a shallow copy of a part of given list.
// The listElement structures are duplicated, 
// but the data that they point to is not.  Only the first n elements
// are copied.
List clonePartialList(List theList, int n){
  List thisList = (List)malloc(sizeof(ListStruct));
  ListElement thisElement = NULL;
  ListElement previousElement = NULL;
  ListElement originalElement = theList->first;
  int i;
  for (i=0;i<n;i++){
    thisElement = cloneElement(originalElement);
    thisElement->prev = previousElement;
    if (previousElement == NULL){
      thisList->first = thisElement;
    }
    else {
      previousElement->next = thisElement;
    }
    originalElement = originalElement->next;
    previousElement = thisElement;
  }
  thisList->nElements = n;
  thisList->last = thisElement;
  thisElement->next = NULL;
  return(thisList);
}

// -----------------------------------------------------------
// Create and return a shallow copy of a given list.
// The listElement structures are duplicated, 
// but the data that they point to is not.
List cloneList(List theList) {
  List thisList = (List)malloc(sizeof(ListStruct));
  ListElement thisElement = NULL;
  ListElement previousElement = NULL;
  ListElement originalElement = theList->first;
  int i;
  for (i=0;i<theList->nElements;i++){
    thisElement = cloneElement(originalElement);
    thisElement->prev = previousElement;
    if (previousElement == NULL){
      thisList->first = thisElement;
    }
    else {
      previousElement->next = thisElement;
    }
    originalElement = originalElement->next;
    previousElement = thisElement;
  }
  thisList->nElements = theList->nElements;
  thisList->last = thisElement;
  return(thisList);
}

// -----------------------------------------------------------
void printElement(ListElement theList){
  printf("self=%p, next=%p, prev=%p. Data is '%s, value=%i\n", 
	 theList, theList->next, theList->prev,
	 theList->data, theList->value);
}

// -----------------------------------------------------------
void printList(List theList){
  ListElement el;
  int i;
  if (theList->nElements == 0){
    printf("   This list is empty.\n");
  }
  else {
    printf("   This list has %i element(s): \n", theList->nElements);
    i=0;
    el = theList->first;
    while (el != NULL && i<100) {  // don't show more than 100 elements
      printf("   %i : ",i);
      printElement(el);
      el = el->next;
      i++;
    }
  }
}

// -----------------------------------------------------------
void appendElementToList(List theList, ListElement newElement){
  theList->nElements++;
  if (theList->last == NULL){
    theList->first = newElement;
    theList->last = newElement;
  }
  else {
    theList->last->next = newElement;
    newElement->prev = theList->last;
    theList->last = newElement;
  }
}

// -----------------------------------------------------------
void appendStringToList(List theList, char* theString){
  ListElement theElement = makeElement(theString);
  appendElementToList(theList, theElement);
}

// -----------------------------------------------------------
void testList(){
  List theList = makeEmptyList();
  List secondList;
  ListElement anElement;

  printf(" ------------------- test list operations ------------ \n");

  printf("  adding 'Hello'\n");
  appendStringToList(theList, "Hello");
  printList(theList);

  printf("  adding 'Goodbye'\n");
  appendStringToList(theList, "Goodbye");

  printf("  adding 'Put123Back'\n");
  appendStringToList(theList, "Put123Back");

  printf("  adding 'Yet another'\n");
  appendStringToList(theList, "Yet another");

  printList(theList);

  anElement = nthElement(theList,2);
  printf("  Element 2 is: \n");
  printElement(anElement);

  secondList = cloneList(theList);
  printf("  Duplicate copy of that list: \n");
  printList(secondList);

  secondList = clonePartialList(theList, 2);
  printf("  Duplicate copy of 1st 2 elements of that list: \n");
  printList(secondList);

}

// -----------------------------------------------------------
// return the "value" of a string from the assignment definition.
int getStringValue(char* theString){
  int totalValue = 0;
  int characterValue=0;
  int asInt=1;
  int i=0;
  while (asInt != 0 && i<MaxChars) {
    asInt = (int)theString[i];
    if (48 <= asInt && asInt <= 57) {
      characterValue = asInt - 48;
    }
    else if (65 <= asInt && asInt <= 90 ) {
      characterValue = asInt - 55;
    }
    else if (97 <= asInt && asInt <= 122 ) {
      characterValue = asInt - 60;
    }
    else {
      characterValue = 0;
    }
//  printf(
//     "   in getStringValue: char %i is '%c', asInt is %i, value is %i\n",
//     i, theString[i], asInt, characterValue);
    i++;
    totalValue += characterValue;
  }
  return(totalValue);
}

//  By printing out some values (uncomment printf in getStringValue() , 
//  I see that the ascii values I care about are are:
//   "0" => 48, "9" => 57  , so subtract 48 from these to get 0..9
//   "A" => 65, "Z" => 90  , so subtract 55 from these to get 10..35
//   "a" => 97, "z" => 122 , so subtract 60 from these to get 37..62
//  I'll ignore the other characters.
void testGetStringValue(){
  char* sampleString = "09.AZ.az.";
  int value;
  printf(" ---------- test getStringValue ---------- \n");
  printf("  sampleString is '%s'\n", sampleString);
  printf("  length is %i\n", strlen(sampleString));
  value = getStringValue(sampleString);
  printf("  total value this word is %i\n", value);
}


// -----------------------------------------------------------
List readWordsIntoList(int howManyWords){
  char* fileName = "/usr/share/lib/dict/words"; // 25143 words, one per line.
  char oneWord[MaxChars];
  int n=0;
  int s;
  FILE* filePtr = fopen(fileName,"r");		// "r" is "read only" mode
  List theList = makeEmptyList();

  printf("# Reading %i words from '%s'.\n", howManyWords, fileName);
  if (filePtr==NULL){ 
    fprintf(stderr, "Error opening file '%s'\n");
    exit(1);					// Non-zero signals an error.
  }
  while (n<howManyWords) {
    fscanf(filePtr, "%s", oneWord);
    if (feof(filePtr)) {			// end of file test.
      return(theList);
    }
    appendStringToList(theList, oneWord);
    n++;
  }
  return(theList);
}

// -----------------------------------------------------------
// Return the element with the highest value starting at given element,
// after looking at nWords elements.
ListElement findMaxOfNWords(ListElement element, int nWords){
  ListElement bestSoFar = element;
  int i;
  for (i=1;i<nWords;i++){  // start at 1 since we have 0 already in hand.
    if (element == NULL) {
      printf(" OOPS: hit end of list in findMaxOfNWords\n");
      return(NULL);  // error return
    }
    element = element->next;			// walk down the list
    if (element->value > bestSoFar->value){
      bestSoFar = element;
    }
  }
  return(bestSoFar);
}

// -----------------------------------------------------------
void findMaxValue(List theList){
  ListElement el = theList->first;
  ListElement bestSoFar;
  if (el==NULL) {
    printf(" Oops: can't do findMaxValue on an empty list.\n");
    return;
  }
  printf(" Looking for largest value in %i element list...\n",
	 theList->nElements);
  bestSoFar = el;
  while (el->next != NULL){
    el = el->next;				// walk down the list
    if (el->value > bestSoFar->value){
      bestSoFar = el;
    }
  }
  printf(" Done.   *** Biggest is %i on word '%s'. ***\n\n", 
	 bestSoFar->value, bestSoFar->data);
}

// -----------------------------------------------------------
// Return the N'th element of the given list, 0<=N<=(size-1)
// For N>=size, return NULL.
ListElement nthElement(List theList, int n){
  ListElement el = theList->first;
  int i;
  if (n >= theList->nElements){
    return(NULL);
  }
  for (i=0;i<n;i++){
    el = el->next;
  }
  return(el);
}

// -----------------------------------------------------------
// Once this piece (and the others) worked, 
// I cut'n'paste the useful bits into readWordsIntoList.
void trialReadWords(){
  char* fileName = "/usr/share/lib/dict/words"; // 25143 words, one per line.
  char oneWord[MaxChars];
  int howManyWords = 20;			// Just as a warm up.
  int n=0;
  int s;
  FILE* filePtr = fopen(fileName,"r");		// "r" is "read only" mode

  printf(" ---------- test read words ---------- \n");

  if (filePtr==NULL){ 
    fprintf(stderr, "Error opening file '%s'\n");
    exit(1);					// Non-zero signals an error.
  }
  while (n<howManyWords) {
    fscanf(filePtr, "%s", oneWord);		
    s = strlen((const char*)oneWord);		// without cast gives warning
    printf(" %6i: '%s', length=%i, value=%i \n",
	   n,oneWord,s, getStringValue(oneWord));
    n++;
  }

}

// -----------------------------------------------------------
// Return the time (in seconds) it takes find the max value of nWords words.
// (This gethrtime() is a Solaris call; haven't looked at timing on linux.)
double timeFindMax(int nWords, List theList){
  hrtime_t start,end;
  const int repititions = 20;
  int i;
  ListElement maxFound;
  start=gethrtime();
  for (i=0;i<repititions;i++){
    maxFound = findMaxOfNWords(theList->first, nWords);
  }
  end=gethrtime();
  return 1.0e-9*(end-start)/repititions;
}

// -----------------------------------------------------------
// Find how long it takes for various values n of nWords,
// using nPoints values of n.  Print out a list of the results.
// Put "#" at the start of the non-numeric lines so that gnuplot
// will ignore them.  (Same in printf() in readWordsIntoList().)
void analyzeFindMax(int nPoints){
  List theList = readWordsIntoList(50000);  // read the whole file.
  double sec;
  int dN, n;
  const int MaxNWords = 25143;  // number of words in the file.
  dN = MaxNWords/nPoints;
  printf("# Time (in sec) to find max of n words,\n");
  printf("# using %i values for n:\n", nPoints);
  for (n=dN;n<MaxNWords;n+=dN){
    // sec = timeFindMax(n, theList);
    printf(" %10i   %-20g \n", n, sec);
  }
}

// -----------------------------------------------------------
// Insertion sort a given list according to the element "value" fields,
// putting smaller values before bigger ones.  This changes the 
// link pointers in the given list elements, and thus destroys 
// order of passed list.
void sortList(List theList) {
  ListElement	sortedFirst, sortedLast, unsorted, nextUnsorted, sorted;
  if (theList->nElements < 2) { return; } // Don't sort a 0 or 1 element list.
  // Pull the first element of the original list out and treat it as the start
  // of a new, sorted list.  (I don't bother with an actual ListStruct,
  // but just hold the first,last elements of this new sorted list.)
  //  (Note that during the sort I don't try to keep all these pointers
  //   entirely consistent.  In particular, unsorted is the first element
  //   of those that are unsorted, so unsorted->prev should really
  //   be NULL - but since all it's pointers will be changed when its moved
  //   into the sorted list anyway, that doesn't matter.
  sortedFirst = theList->first;		  // First in the newly sorted list.
  sortedLast =  sortedFirst;		  // Last in the newly sorted list.
  unsorted = theList->first->next;	  // First of the remaining unsorted.
  sortedFirst->next = NULL;
  // Outer loop over element number 1 to n-1  (element 0 is already sorted)
  while ( unsorted != NULL ) {
    // printf(" OUTER LOOP: unsorted = \n");
    // printElement(unsorted);
    nextUnsorted = unsorted->next;  // Remember this before u's links change.
    sorted = sortedFirst;
    // Inner loop over (at most) sorted list
    while (sorted != NULL) {        
      // printf("  INNER LOOP: sorted = \n");  
      // printElement(sorted);
      if ( unsorted->value < sorted->value ) {   // If u goes before s,
	// printf("   Found unsorted < sorted \n");
	unsorted->next = sorted;
	unsorted->prev = sorted->prev;
	if (sorted == sortedFirst) {    // Is this the start of sorted list?
	  // printf("   Placing at start of sorted list. \n");
	  sortedFirst = unsorted;       // - Yes, so reset ordered list start.
	}                               //
	else {				// - No, so set sorted->prev
	  sorted->prev->next = unsorted;
	}
	sorted->prev   = unsorted;
	break;					// Done this sorted loop.
      }
      sorted = sorted->next;			// Continue walk down s's list.
    }
    if (sorted == NULL) {		// Didn't find a place to put the
      /// printf(" Placing at end of list. \n");
      sortedLast->next = unsorted;	// unsorted one, so put it at the end.
      unsorted->prev   = sortedLast;
      unsorted->next   = NULL;
      sortedLast = unsorted;
    }
    unsorted = nextUnsorted;	  // On to the next unsorted element.
  }
  theList->first = sortedFirst;   // Done.
  theList->last  = sortedLast;    // Reset the start and end of theList.
}

// -----------------------------------------------------------
void testSortList(){
  List theList = makeEmptyList();
  ListElement thisElement;

  thisElement = makeElement("Ten");
  thisElement->value = 10;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Eight");
  thisElement->value = 8;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Nine");
  thisElement->value = 9;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Three");
  thisElement->value = 3;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Two");
  thisElement->value = 2;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Three again");
  thisElement->value = 3;
  appendElementToList(theList, thisElement);

  thisElement = makeElement("Seven");
  thisElement->value = 7;
  appendElementToList(theList, thisElement);

  printf(" Original list order: \n");
  printList(theList);

  sortList(theList);

  printf(" Sorted list order: \n");
  printList(theList);
}

// -----------------------------------------------------------
// Return the time (in seconds) it takes to clone and sort 
// a list of nWords words.
// (This gethrtime() is a Solaris call; haven't looked at timing on linux.)
double timeSortList(int nWords, List theList){
  hrtime_t start,end;
  List copyOfList;
  const int repititions = 2;
  int i;
  start=gethrtime();
  for (i=0;i<repititions;i++){
    copyOfList = clonePartialList(theList,nWords);
    sortList(copyOfList);
  }
  end=gethrtime();
  return 1.0e-9*(end-start)/repititions;
}


// -----------------------------------------------------------
// Find how long it takes to sort various length lists.
// Print out a list of the results.
// Put "#" at the start of the non-numeric lines so that gnuplot
// will ignore them.  (Same in printf() in readWordsIntoList().)
void analyzeSortList(int nPoints){
  List theList = readWordsIntoList(50000);  // read the whole file.
  double sec;
  int dN, n;
  const int MaxNWords = 25143;  // number of words in the file.
  dN = MaxNWords/nPoints;
  printf("# Time (in sec) to copy and sort a list of n words,\n");
  printf("# using %i values for n:\n", nPoints);
  for (n=dN;n<MaxNWords;n+=dN){
    sec = timeSortList(n, theList);
    printf(" %10i   %-20g \n", n, sec);
  }
}



// ==== Fire it up. =================================================

int main(){

  /*** run various test ***/
  // testGetStringValue(); // Calculate values from strings
  // testList();           // Create elements and list,  print and clone 'em.
  // trialReadWords();     // Read in the dictionary.
  // testSortList();          // Insertion sort a short sample list.

  /** Version 1: find max of entire file **/
  //findMaxValue(readWordsIntoList(50000));

  /** Version 1.1: timing of various values for nWords. **/
  //analyzeFindMax(100);

  //List theList = readWordsIntoList(50000);  // read the whole file.
  //int n = 1000;
  //double sec = timeSortList(n, theList);
  //printf(" It takes %g seconds to sort %i points. \n", sec,n );

  analyzeSortList(20);

}


syntax highlighted by Code2HTML, v. 0.9.1