/*************
*
* sortWords.c
*
* September 2002 assignment in Algorithms class
*
* -- original assignment --
*
* Expand the linked list example (see ../listwords/) to include sorting
* the list. Again, find the time it takes to run as a function of
* nWords. What is its behavior on this data?
*
* Modify the program again so that it stores the data in an array
* rather than a list. Does this make it faster or slower?
* What are the advantages or disadvantages?
*
* Jim Mahoney (mahoney@marlboro.edu)
*
* -- version history -------------------------------------------------
*
* version 2.3
* added timing for sortList
* added clonePartialList and analysis routines
* BUT note that I'm not cleaning up any memory, but just creating
* more copies of the list to sort. EVERYTHING is a memory leak right now.
*
* version 2.2
* debugged sortList in class
*
* version 2.1 Oct 2
* working on sortingList - in progress.
*
* version 2.0, Sep 30 (version 1.x is ../listwords/listWords.c)
* added and tested cloneList, cloneElment
*
******************************************************/
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
// ===== defines, typedefs and subroutine declarations ================
// ===== (Might put into a .h file later.) =============================
// maximum number of characters in one word.
#define MaxChars 128
// A doubly linked list element to hold a string and its value.
typedef struct ListElementStruct_ {
struct ListElementStruct_* next;
struct ListElementStruct_* prev;
char* data;
int value;
} ListElementStruct, *ListElement;
typedef struct ListStruct_ {
ListElement first, last;
int nElements;
} ListStruct, *List;
int getStringValue(char* aString);
List makeEmptyList();
ListElement makeElement(char* inputData);
void printElement(ListElement theList);
void printList(List theList);
void appendElementToList(List theList, ListElement newElement);
void appendStringToList(List theList, char* theString);
ListElement nthElement(List theList, int n);
List cloneList(List theList);
List clonePartialList(List theList, int n);
ListElement cloneElement(ListElement theElement);
List readWordsIntoList(int howManyWords);
void findMaxValue(List theList);
ListElement findMaxOfNWords(ListElement element, int nWords);
double timeFindMax(int nWords, List theList);
void analyzeFindMax(int nPoints);
void sortList(List theList);
void testList();
void testGetStringValue();
void trialReadWords();
// ==== subroutine implementations ==============================
// -----------------------------------------------------------
// Create and return a new empty list.
List makeEmptyList() {
List newList = (List)malloc(sizeof(ListStruct));
newList->nElements = 0;
newList->first = NULL;
newList->last = NULL;
return(newList);
}
// -----------------------------------------------------------
// Create and return a new list element given an input string.
// Also, calculate and store the value of the string.
ListElement makeElement(char* inputData){
ListElement newElement = (ListElement)malloc(sizeof(ListElementStruct));
newElement->next = NULL;
newElement->prev = NULL;
newElement->data = (char*)malloc(strlen(inputData));
newElement->value = getStringValue(inputData);
strcpy(newElement->data, inputData);
return(newElement);
}
// -----------------------------------------------------------
// Create and return a shallow copy of a given list element.
// This routine does *not* copy the data, but instead points
// the new element at the same data.
// The prev and next links are set to NULL.
ListElement cloneElement(ListElement theElement){
ListElement thisElement = (ListElement)malloc(sizeof(ListElementStruct));
thisElement->next = NULL;
thisElement->prev = NULL;
thisElement->data = theElement->data;
thisElement->value = theElement->value;
return(thisElement);
}
// -----------------------------------------------------------
// Create and return a shallow copy of a part of given list.
// The listElement structures are duplicated,
// but the data that they point to is not. Only the first n elements
// are copied.
List clonePartialList(List theList, int n){
List thisList = (List)malloc(sizeof(ListStruct));
ListElement thisElement = NULL;
ListElement previousElement = NULL;
ListElement originalElement = theList->first;
int i;
for (i=0;i<n;i++){
thisElement = cloneElement(originalElement);
thisElement->prev = previousElement;
if (previousElement == NULL){
thisList->first = thisElement;
}
else {
previousElement->next = thisElement;
}
originalElement = originalElement->next;
previousElement = thisElement;
}
thisList->nElements = n;
thisList->last = thisElement;
thisElement->next = NULL;
return(thisList);
}
// -----------------------------------------------------------
// Create and return a shallow copy of a given list.
// The listElement structures are duplicated,
// but the data that they point to is not.
List cloneList(List theList) {
List thisList = (List)malloc(sizeof(ListStruct));
ListElement thisElement = NULL;
ListElement previousElement = NULL;
ListElement originalElement = theList->first;
int i;
for (i=0;i<theList->nElements;i++){
thisElement = cloneElement(originalElement);
thisElement->prev = previousElement;
if (previousElement == NULL){
thisList->first = thisElement;
}
else {
previousElement->next = thisElement;
}
originalElement = originalElement->next;
previousElement = thisElement;
}
thisList->nElements = theList->nElements;
thisList->last = thisElement;
return(thisList);
}
// -----------------------------------------------------------
void printElement(ListElement theList){
printf("self=%p, next=%p, prev=%p. Data is '%s, value=%i\n",
theList, theList->next, theList->prev,
theList->data, theList->value);
}
// -----------------------------------------------------------
void printList(List theList){
ListElement el;
int i;
if (theList->nElements == 0){
printf(" This list is empty.\n");
}
else {
printf(" This list has %i element(s): \n", theList->nElements);
i=0;
el = theList->first;
while (el != NULL && i<100) { // don't show more than 100 elements
printf(" %i : ",i);
printElement(el);
el = el->next;
i++;
}
}
}
// -----------------------------------------------------------
void appendElementToList(List theList, ListElement newElement){
theList->nElements++;
if (theList->last == NULL){
theList->first = newElement;
theList->last = newElement;
}
else {
theList->last->next = newElement;
newElement->prev = theList->last;
theList->last = newElement;
}
}
// -----------------------------------------------------------
void appendStringToList(List theList, char* theString){
ListElement theElement = makeElement(theString);
appendElementToList(theList, theElement);
}
// -----------------------------------------------------------
void testList(){
List theList = makeEmptyList();
List secondList;
ListElement anElement;
printf(" ------------------- test list operations ------------ \n");
printf(" adding 'Hello'\n");
appendStringToList(theList, "Hello");
printList(theList);
printf(" adding 'Goodbye'\n");
appendStringToList(theList, "Goodbye");
printf(" adding 'Put123Back'\n");
appendStringToList(theList, "Put123Back");
printf(" adding 'Yet another'\n");
appendStringToList(theList, "Yet another");
printList(theList);
anElement = nthElement(theList,2);
printf(" Element 2 is: \n");
printElement(anElement);
secondList = cloneList(theList);
printf(" Duplicate copy of that list: \n");
printList(secondList);
secondList = clonePartialList(theList, 2);
printf(" Duplicate copy of 1st 2 elements of that list: \n");
printList(secondList);
}
// -----------------------------------------------------------
// return the "value" of a string from the assignment definition.
int getStringValue(char* theString){
int totalValue = 0;
int characterValue=0;
int asInt=1;
int i=0;
while (asInt != 0 && i<MaxChars) {
asInt = (int)theString[i];
if (48 <= asInt && asInt <= 57) {
characterValue = asInt - 48;
}
else if (65 <= asInt && asInt <= 90 ) {
characterValue = asInt - 55;
}
else if (97 <= asInt && asInt <= 122 ) {
characterValue = asInt - 60;
}
else {
characterValue = 0;
}
// printf(
// " in getStringValue: char %i is '%c', asInt is %i, value is %i\n",
// i, theString[i], asInt, characterValue);
i++;
totalValue += characterValue;
}
return(totalValue);
}
// By printing out some values (uncomment printf in getStringValue() ,
// I see that the ascii values I care about are are:
// "0" => 48, "9" => 57 , so subtract 48 from these to get 0..9
// "A" => 65, "Z" => 90 , so subtract 55 from these to get 10..35
// "a" => 97, "z" => 122 , so subtract 60 from these to get 37..62
// I'll ignore the other characters.
void testGetStringValue(){
char* sampleString = "09.AZ.az.";
int value;
printf(" ---------- test getStringValue ---------- \n");
printf(" sampleString is '%s'\n", sampleString);
printf(" length is %i\n", strlen(sampleString));
value = getStringValue(sampleString);
printf(" total value this word is %i\n", value);
}
// -----------------------------------------------------------
List readWordsIntoList(int howManyWords){
char* fileName = "/usr/share/lib/dict/words"; // 25143 words, one per line.
char oneWord[MaxChars];
int n=0;
int s;
FILE* filePtr = fopen(fileName,"r"); // "r" is "read only" mode
List theList = makeEmptyList();
printf("# Reading %i words from '%s'.\n", howManyWords, fileName);
if (filePtr==NULL){
fprintf(stderr, "Error opening file '%s'\n");
exit(1); // Non-zero signals an error.
}
while (n<howManyWords) {
fscanf(filePtr, "%s", oneWord);
if (feof(filePtr)) { // end of file test.
return(theList);
}
appendStringToList(theList, oneWord);
n++;
}
return(theList);
}
// -----------------------------------------------------------
// Return the element with the highest value starting at given element,
// after looking at nWords elements.
ListElement findMaxOfNWords(ListElement element, int nWords){
ListElement bestSoFar = element;
int i;
for (i=1;i<nWords;i++){ // start at 1 since we have 0 already in hand.
if (element == NULL) {
printf(" OOPS: hit end of list in findMaxOfNWords\n");
return(NULL); // error return
}
element = element->next; // walk down the list
if (element->value > bestSoFar->value){
bestSoFar = element;
}
}
return(bestSoFar);
}
// -----------------------------------------------------------
void findMaxValue(List theList){
ListElement el = theList->first;
ListElement bestSoFar;
if (el==NULL) {
printf(" Oops: can't do findMaxValue on an empty list.\n");
return;
}
printf(" Looking for largest value in %i element list...\n",
theList->nElements);
bestSoFar = el;
while (el->next != NULL){
el = el->next; // walk down the list
if (el->value > bestSoFar->value){
bestSoFar = el;
}
}
printf(" Done. *** Biggest is %i on word '%s'. ***\n\n",
bestSoFar->value, bestSoFar->data);
}
// -----------------------------------------------------------
// Return the N'th element of the given list, 0<=N<=(size-1)
// For N>=size, return NULL.
ListElement nthElement(List theList, int n){
ListElement el = theList->first;
int i;
if (n >= theList->nElements){
return(NULL);
}
for (i=0;i<n;i++){
el = el->next;
}
return(el);
}
// -----------------------------------------------------------
// Once this piece (and the others) worked,
// I cut'n'paste the useful bits into readWordsIntoList.
void trialReadWords(){
char* fileName = "/usr/share/lib/dict/words"; // 25143 words, one per line.
char oneWord[MaxChars];
int howManyWords = 20; // Just as a warm up.
int n=0;
int s;
FILE* filePtr = fopen(fileName,"r"); // "r" is "read only" mode
printf(" ---------- test read words ---------- \n");
if (filePtr==NULL){
fprintf(stderr, "Error opening file '%s'\n");
exit(1); // Non-zero signals an error.
}
while (n<howManyWords) {
fscanf(filePtr, "%s", oneWord);
s = strlen((const char*)oneWord); // without cast gives warning
printf(" %6i: '%s', length=%i, value=%i \n",
n,oneWord,s, getStringValue(oneWord));
n++;
}
}
// -----------------------------------------------------------
// Return the time (in seconds) it takes find the max value of nWords words.
// (This gethrtime() is a Solaris call; haven't looked at timing on linux.)
double timeFindMax(int nWords, List theList){
hrtime_t start,end;
const int repititions = 20;
int i;
ListElement maxFound;
start=gethrtime();
for (i=0;i<repititions;i++){
maxFound = findMaxOfNWords(theList->first, nWords);
}
end=gethrtime();
return 1.0e-9*(end-start)/repititions;
}
// -----------------------------------------------------------
// Find how long it takes for various values n of nWords,
// using nPoints values of n. Print out a list of the results.
// Put "#" at the start of the non-numeric lines so that gnuplot
// will ignore them. (Same in printf() in readWordsIntoList().)
void analyzeFindMax(int nPoints){
List theList = readWordsIntoList(50000); // read the whole file.
double sec;
int dN, n;
const int MaxNWords = 25143; // number of words in the file.
dN = MaxNWords/nPoints;
printf("# Time (in sec) to find max of n words,\n");
printf("# using %i values for n:\n", nPoints);
for (n=dN;n<MaxNWords;n+=dN){
// sec = timeFindMax(n, theList);
printf(" %10i %-20g \n", n, sec);
}
}
// -----------------------------------------------------------
// Insertion sort a given list according to the element "value" fields,
// putting smaller values before bigger ones. This changes the
// link pointers in the given list elements, and thus destroys
// order of passed list.
void sortList(List theList) {
ListElement sortedFirst, sortedLast, unsorted, nextUnsorted, sorted;
if (theList->nElements < 2) { return; } // Don't sort a 0 or 1 element list.
// Pull the first element of the original list out and treat it as the start
// of a new, sorted list. (I don't bother with an actual ListStruct,
// but just hold the first,last elements of this new sorted list.)
// (Note that during the sort I don't try to keep all these pointers
// entirely consistent. In particular, unsorted is the first element
// of those that are unsorted, so unsorted->prev should really
// be NULL - but since all it's pointers will be changed when its moved
// into the sorted list anyway, that doesn't matter.
sortedFirst = theList->first; // First in the newly sorted list.
sortedLast = sortedFirst; // Last in the newly sorted list.
unsorted = theList->first->next; // First of the remaining unsorted.
sortedFirst->next = NULL;
// Outer loop over element number 1 to n-1 (element 0 is already sorted)
while ( unsorted != NULL ) {
// printf(" OUTER LOOP: unsorted = \n");
// printElement(unsorted);
nextUnsorted = unsorted->next; // Remember this before u's links change.
sorted = sortedFirst;
// Inner loop over (at most) sorted list
while (sorted != NULL) {
// printf(" INNER LOOP: sorted = \n");
// printElement(sorted);
if ( unsorted->value < sorted->value ) { // If u goes before s,
// printf(" Found unsorted < sorted \n");
unsorted->next = sorted;
unsorted->prev = sorted->prev;
if (sorted == sortedFirst) { // Is this the start of sorted list?
// printf(" Placing at start of sorted list. \n");
sortedFirst = unsorted; // - Yes, so reset ordered list start.
} //
else { // - No, so set sorted->prev
sorted->prev->next = unsorted;
}
sorted->prev = unsorted;
break; // Done this sorted loop.
}
sorted = sorted->next; // Continue walk down s's list.
}
if (sorted == NULL) { // Didn't find a place to put the
/// printf(" Placing at end of list. \n");
sortedLast->next = unsorted; // unsorted one, so put it at the end.
unsorted->prev = sortedLast;
unsorted->next = NULL;
sortedLast = unsorted;
}
unsorted = nextUnsorted; // On to the next unsorted element.
}
theList->first = sortedFirst; // Done.
theList->last = sortedLast; // Reset the start and end of theList.
}
// -----------------------------------------------------------
void testSortList(){
List theList = makeEmptyList();
ListElement thisElement;
thisElement = makeElement("Ten");
thisElement->value = 10;
appendElementToList(theList, thisElement);
thisElement = makeElement("Eight");
thisElement->value = 8;
appendElementToList(theList, thisElement);
thisElement = makeElement("Nine");
thisElement->value = 9;
appendElementToList(theList, thisElement);
thisElement = makeElement("Three");
thisElement->value = 3;
appendElementToList(theList, thisElement);
thisElement = makeElement("Two");
thisElement->value = 2;
appendElementToList(theList, thisElement);
thisElement = makeElement("Three again");
thisElement->value = 3;
appendElementToList(theList, thisElement);
thisElement = makeElement("Seven");
thisElement->value = 7;
appendElementToList(theList, thisElement);
printf(" Original list order: \n");
printList(theList);
sortList(theList);
printf(" Sorted list order: \n");
printList(theList);
}
// -----------------------------------------------------------
// Return the time (in seconds) it takes to clone and sort
// a list of nWords words.
// (This gethrtime() is a Solaris call; haven't looked at timing on linux.)
double timeSortList(int nWords, List theList){
hrtime_t start,end;
List copyOfList;
const int repititions = 2;
int i;
start=gethrtime();
for (i=0;i<repititions;i++){
copyOfList = clonePartialList(theList,nWords);
sortList(copyOfList);
}
end=gethrtime();
return 1.0e-9*(end-start)/repititions;
}
// -----------------------------------------------------------
// Find how long it takes to sort various length lists.
// Print out a list of the results.
// Put "#" at the start of the non-numeric lines so that gnuplot
// will ignore them. (Same in printf() in readWordsIntoList().)
void analyzeSortList(int nPoints){
List theList = readWordsIntoList(50000); // read the whole file.
double sec;
int dN, n;
const int MaxNWords = 25143; // number of words in the file.
dN = MaxNWords/nPoints;
printf("# Time (in sec) to copy and sort a list of n words,\n");
printf("# using %i values for n:\n", nPoints);
for (n=dN;n<MaxNWords;n+=dN){
sec = timeSortList(n, theList);
printf(" %10i %-20g \n", n, sec);
}
}
// ==== Fire it up. =================================================
int main(){
/*** run various test ***/
// testGetStringValue(); // Calculate values from strings
// testList(); // Create elements and list, print and clone 'em.
// trialReadWords(); // Read in the dictionary.
// testSortList(); // Insertion sort a short sample list.
/** Version 1: find max of entire file **/
//findMaxValue(readWordsIntoList(50000));
/** Version 1.1: timing of various values for nWords. **/
//analyzeFindMax(100);
//List theList = readWordsIntoList(50000); // read the whole file.
//int n = 1000;
//double sec = timeSortList(n, theList);
//printf(" It takes %g seconds to sort %i points. \n", sec,n );
analyzeSortList(20);
}
syntax highlighted by Code2HTML, v. 0.9.1