/****************************************************************************
* HTML to Pig Latin HTML Conversion Script
* Written By Josh Rantane
* Last updated: Nov 21, 1995
*   http://voyager.cns.ohiou.edu/~jrantane/cgi/pig/pig.c
*
* modified slightly by Jim Mahoney
*  8/98  - added INADDR and PIG_ADDRESS, on akbar.marlboro.edu
*  10/03 - removed INADDR stuff, on cs.marlboro.edu, put in new address explicitly
*          compiled with "gcc pig.c -o pig.cgi"
* 
****************************************************************************/

/*
 * Changes
 * -------
 * Nov 21 - Make the pig latin better.  Worry about the case of vowels.
 *
 * July 25, 1996:
 *   Did I mention that this thing was the biggest hack from hell?!?!?
 *   1) Add stuff so that people who don't freaking use '"' like they
 *      supposed to in A HREF/IMG tags still work.  (Damn Bastards :)).
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <sys/stat.h>

#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
#include <sys/time.h>

#define DEBUG 0

int is_vowel(char ch)
{
 switch(ch) {
        case 'a':
        case 'A':
        case 'e':
        case 'E':
        case 'i':
        case 'I':
        case 'o':
        case 'O':
        case 'u':
        case 'U':
          return(1);
      break;
        default:
          return(0);
 }
 /******* no way to get here, so I commented it out.  - Jim
 /* return(0); */

}

       /* 
        * OK, so we just got the '<A HREF'. Now we gotta figure
        * figure out what to do with it.
        */
void do_href(char *hostname, char *pathnameo, int fd)
{
int i;
char a;
char pathname[2048];

   strcpy(pathname, pathnameo); /* Keep old pathname cool */

   if (DEBUG) { /* Debugging info */
     printf("\n");
     printf("DEBUGGING INFO: hostname = %s\n", hostname);
     printf("DEBUGGING INFO: pathname = %s\n", pathname);
   }

                /*
                 * This is if they are starting from the root directory.
                 * Should be kinda rare, but it does happen.
                 * Since we're putting in the hostname and the hostname ends
                 * in a '/', we don't want two '/''s, so we just null this sucker
                 * out.
                 */
   if (strcmp(pathname, "/")==0) *pathname='\0';

                /* 
                 * Get one character at a time here.
                 * Do it til EOF or we are at the end of the tag.
                 */
   for (recv(fd, &a, 1, 0),i=0;(a!='>') && (a!='\0') && (i<100);recv(fd, &a, 1, 0)) {

        switch(i) {
                case 0:
                        putchar(a); 
                        if(tolower(a)=='=') i++; 
                        break;
                case 1:
                    /* if(tolower(a)=='h') { i+=2; break; } */
                        /* putchar(a);  */
                        if(tolower(a)=='"') { putchar(a); i++; break; }
                        else if(isspace(a)) { putchar(a); break; }
            else i++;
            /* break; */
                case 2:
                        if(tolower(a)=='h') i++; 
                        else if(tolower(a)=='f') i=7;
                        else if(tolower(a)=='m') i=10;
                        else if(tolower(a)=='n') i=20;
                        else {
			  printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s", hostname);

                          if (a!='/') printf("/%s%c", pathname, a);
                          else putchar(a);
                          i=100;
                        }
                        break;
                case 3:
                        if(tolower(a)=='t') i++; 
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/h%c", hostname, a);
                          i=100;
                        }
                        break;
                case 4:
                        if(tolower(a)=='t') i++; 
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ht%c", hostname, a);
                          i=100;
                        }
                        break;
                case 5:
                        if(tolower(a)=='p') i++; 
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/htt%c", hostname, a);
                          i=100;
                        }
                        break;
                case 6:
                        if(tolower(a)==':') {
                          i=100; 
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http:");
                        } else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/http%c", hostname, a);
                          i=100;
                        }
                        break;
                case 7:
                        if(tolower(a)=='t') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/f%c", hostname, a);
                          i=100;
                        }
                        break;
                case 8:
                        if(tolower(a)=='p') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ft%c", hostname, a);
                          i=100;
                        }
                        break;
                case 9:
                        i=100;
                        if(tolower(a)==':') {
                          printf("ftp:");
                        } else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ftp%c", hostname, a);
                        }
                        break;
                case 10:
                        if(tolower(a)=='a') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/m%c", hostname, a);
                          i=100;
                        }
                        break;
                case 11:
                        if(tolower(a)=='i') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ma%c", hostname, a);
                          i=100;
                        }
                        break;
                case 12:
                        if(tolower(a)=='l') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mai%c", hostname, a);
                          i=100;
                        }
                        break;
                case 13:
                        if(tolower(a)=='t') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mait%c", hostname, a);
                          i=100;
                        }
                        break;
                case 14:
                        if(tolower(a)=='o') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mailt%c", hostname, a);
                          i=100;
                        }
                        break;
                case 15:
                        i=100;
                        if(tolower(a)==':') {
                          printf("mailto:");
                        } else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mailto%c", hostname, a);
                        }
                        break;
                case 20:
                        if(tolower(a)=='e') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/n%c", hostname, a);
                          i=100;
                        }
                        break;
                case 21:
                        if(tolower(a)=='w') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ne%c", hostname, a);
                          i=100;
                        }
                        break;
                case 22:
                        if(tolower(a)=='s') i++;
                        else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/new%c", hostname, a);
                          i=100;
                        }
                        break;
                case 23:
                        i=100;
                        if(tolower(a)==':') {
                          printf("news:");
                        } else {
                          printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/news%c", hostname, a);
                        }
                        break;
                        
          }
        }       
  putchar(a);
  return;
}

void do_img(char *hostname, char *pathnameo, int fd)
{
int i;
char a;
char *pathname;
char pathnamen[2048];

   pathname=strcpy(pathnamen, pathnameo);

   if (DEBUG) {
     printf("\nDEBUGGING INFO: hostname = %s\n", hostname);
     printf("DEBUGGING INFO: pathname = %s\n", pathname);
   }

   if (strncmp(pathname, "/", 1)==0) *(pathname++)='\0';

   for (recv(fd, &a, 1, 0),i=0;(a!='>') && (a!='\0') && (i<10);recv(fd, &a, 1, 0)) {
        switch(i) {
                case 0:
                        putchar(a); 
                        if(tolower(a)=='=') i++; 
                        else if(!isspace(a)) i=10;
                        break;
                case 1:
                    /* if(tolower(a)=='h') { i+=2; break; } */
                        /* putchar(a); */
                        if(tolower(a)=='"') { i++; putchar(a); break; }
                        else if(isspace(a)) { putchar(a); break; }
            else i++; /* Don't break, we still gotta figure out what to do */
                case 2:
                        if(tolower(a)=='h') i++; 
                        else {
                          printf("http://%s", hostname);
                          if (a!='/') printf("/%s%c", pathname, a);
                          else putchar(a);
                          i=10;
                        }
                        break;
                case 3:
                        if(tolower(a)=='t') i++; 
                        else {
                          printf("http://%s/%sh%c", hostname, pathname, a);
                          i=10;
                        }
                        break;
                case 4:
                        if(tolower(a)=='t') i++; 
                        else {
                          printf("http://%s/%sht%c", hostname, pathname, a);
                          i=10;
                        }
                        break;
                case 5:
                        if(tolower(a)=='p') i++; 
                        else {
                          printf("http://%s/%shtt%c", hostname, pathname, a);
                          i=10;
                        }
                        break;
                case 6:
                        if(tolower(a)==':') {
                          i=10; 
                          printf("http:");
                        } else {
                          printf("http://%s/%shttp%c", hostname,  pathname, a);
                          i=10;
                        }
                        break;
          }
        }       
  putchar(a);
  return;
}

char x2c(char *what) {
        /* 
         * Note: This function is stolen from ncsa and their form stuff..
         * Thanks ncsa!
         */
    register char digit;

    digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A')+10 : (what[0] - '0'));
    digit *= 16;
    digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A')+10 : (what[1] - '0'));
    return(digit);
}

void unescape_url(char *url)
{
        /* 
         * Note: This function is stolen from ncsa and their form stuff..
         * Thanks ncsa!
         * PS- You know, I still don't quite know how this works...
         */
register int x,y;

    for(x=0,y=0;url[y];++x,++y) {
        if((url[x] = url[y]) == '%') {
            url[x] = x2c(&url[y+1]);
            y+=2;
        }
    }
    url[x] = '\0';
}


int main(int argc, char *argv[])
{
char a,b,*hostname,*pathname, *path_file, *p;
char buff[254];
char smlbuff[254];

struct hostent *host;
struct servent *port;
struct protoent *proto;
struct sockaddr_in sin;
int fd, type, n;
int in_tag =0;
int i;

FILE *fp;

char *service = "80";
char *protocol = "tcp";

  printf("Content-type: text/html\n\n");
        /*
         * Parse the URL
         */
  if (argc > 2) {
        printf("ERROR, wrong number of args. Syntax is : %s <URL> \n", argv[0]);
        exit(0);
  }
    
  
  if (DEBUG) {
    printf("\nDEBUGGING INFO: argv[0] = %s\n", argv[0]);
  }

  if (argc == 1) {
         /*
          * Ok, it's being sent through enviroment vars...
          * We need to take off the TEXT= part and anything that follows.
          * check to make sure they did the http:// thing.. :// = spc chars..
          */

    p=pathname=hostname+=5;
    unescape_url(p);
    for(p=hostname;*p!='&' && *p!='\0';p++) ;
    *p++='\0';
    if (strncmp(hostname, "http://", 7) || (strlen(hostname) < 8)) {
      printf("Error in URL: %s (Try using http://hostname/path)\n", argv[1]);
          exit(0);
    }
        pathname=hostname+=7;
  } else {
    if (strncmp(argv[1], "http://", 7) || (strlen(argv[1]) < 8)) {
      printf("Error in URL: %s (Try using http://hostname/path)\n", argv[1]);
          exit(0);
    }
    pathname=hostname=argv[1]+7;
  } 

         /*
          * The path is after the hostname, so everything after the first
          * front slash (/) is the pathname + the filename if there is a file
          * on there.  
          */
  for (;(strncmp((char *)pathname,"/", 1) != 0);pathname++) 
        if (*pathname=='\0') {
                *pathname++='/';
                *pathname++='\0';
                *pathname++='\0';
            pathname-=3;
            break;
    }

  *pathname++='\0';
  if (!strncmp(pathname, "\\", 1)) *pathname++='\0';
          /*
           * Now that we have the pathname, check to see if there is a file
           * on the end of it.  File is gonna be something.html or we'll also
           * allow those damn windows stuff (something.htm).
           * also if there's no file at the end, make sure they put the '/' on
           * it.
           */
  path_file=(pathname+strlen(pathname))-4;
  if (*path_file++=='h') {
    if (*path_file++=='t') {
      if (*path_file++=='m') {
            while (*path_file--!='/') ;
            path_file++; *path_file++='\0';
      } else path_file=NULL;
    } else path_file=NULL;
  } else if (*path_file++=='h') {
    if (*path_file++=='t') {
      if (*path_file++=='m') {
            while (*path_file--!='/') ;
            path_file++; *path_file++='\0';
      } else path_file=NULL;
    } else path_file=NULL;
  } else path_file=NULL;

  if ((path_file==NULL) && (*(p=(pathname+strlen(pathname)-1)) != '/')) {
    *(++p)='/';
    *(++p)='\0';
  }

  if (DEBUG) {
     printf("\nDEBUGGING: path_file = %s\n", path_file);
  }

        /* 
         * Before we do any networking, let's try to find a port number.
         * To do this, we start at the end of hostname, and search for
         * a ':', if we come to a '.' before the colon, then there's no
         * port number.  If there is a port number then we need to save that
         * into service, and null the ':'.  After we are done with
         * getting the bind/connect/socket stuff, ':' the null back.
         * for later use.
         */
  for(p=(hostname+strlen(hostname)-1);*p!=':' && *p!='.';p--) ;
  if (*p==':')  {
        *p++='\0';
    service=p;
    p--;
  }
    
  if (DEBUG) {
    printf("\nDEBUGGING INFO: hostname  = %s\n", hostname);
    printf("DEBUGGING INFO: pathname  = %s\n", pathname);
    printf("DEBUGGING INFO: path_file = %s\n", path_file);
  }
                
         /*
          * Now we connect to the URL
          * All of this code is ripped off of Scott Adkins and Dr. Ostermann
          * BUWHAHAHAHAHAHA!
          */
  bzero((char *)&sin, sizeof(sin));
  sin.sin_family = AF_INET;

  /** debugging (Jim): **/
  /* printf( " Before crash \n "); */
  /* foo = (sin.sin_addr.s_addr = inet_addr(hostname)); */
  /* printf( "\n INADDR : %d \n ", foo); */

  /* map hostname to IP address, allowing for dotted decimal */
  if ((host = gethostbyname(hostname))!=NULL)
        bcopy(host->h_addr, (char *)&sin.sin_addr, host->h_length);
  else if ((sin.sin_addr.s_addr = inet_addr(hostname)) == INADDR_NONE) {
        printf("can't get %s host entry\n", hostname);
        exit(0);
  }

  /* map service name to port number */
  if ((port = getservbyname(service, protocol)) != NULL)
        sin.sin_port = port->s_port;
  else if (!(sin.sin_port = htons((u_short)atoi(service)))) {
        printf("can't get %s service entry\n", service);
        exit(0);
  }

  /* map protocol name to protocol number */
    if (!(proto = getprotobyname(protocol))) {
        printf("can't get %s protocol entry\n", protocol);
        exit(0);
    }

    /* use protocol to choose a socket type */
    type = SOCK_STREAM;

    /* allocate a socket */
    if ((fd = socket(PF_INET, type, proto->p_proto)) < 0) {
        printf("could not create socket");
        exit(0);
    }

    if (connect(fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
        close(fd);      /* lets not be leaving any descriptors around */
        printf("can't connect to %s.%s\n", hostname, service);
        exit(0);
    }

        /*
         *  Now restore *p to ':' instead of null, if it is null
         */
    if (*p=='\0') *p=':';

   /* fp = fopen("test", "w"); */

   if (path_file!=NULL) {
     if (!strncmp(pathname, "/", 1)) sprintf(buff, "GET %s/%s\n", pathname, path_file);
     else sprintf(buff, "GET /%s/%s\n", pathname, path_file);
   } else {
     if (!strncmp(pathname, "/", 1)) sprintf(buff, "GET %s\n", pathname);
     else sprintf(buff, "GET /%s\n", pathname);
   }
   send(fd, buff, strlen(buff), 0);

        /*
         * We don't need the path_file any more.. but we would like pathname
         * to end in a '/'.. so if it's not already, make it that way.
         */
   if (*(pathname+strlen(pathname)-1) != '/') {
        *path_file--='\0'; 
    *path_file='/';
   }

   /*************************
    *
    * Here's where we start reading in and writing out the file 
    *
    ***********************************/  

 b=0;/* b=0 means we aren't at a word */
     /* else we are writing a word */
 while(recv(fd, &a, 1, 0)) {

   /*~~~~~~~~~~~~~~~~~~~~
   /* This is the beginning of a tag */
    if (a=='<') {
          if (b)  {    /* We are writing a word, so end it. */
            putchar(b);
            putchar('a');
            putchar('y');
            b=0;
	  }

	  putchar(a);
                /* 
                 * Ok, we just started a tag, and we might wanna mess with it..
                 * so lets deal with tags right here...
                 */
	  for (recv(fd, &a, 1, 0),i=0;(a!='\0') && (a!='>');recv(fd, &a, 1, 0)) {
                putchar(a);
                switch(i) {
                  case 0:
                switch(tolower(a)) {
                          case 's': i=10; break; /* Src? */
                          case 'h': i=20; break; /* HREF? */
                          case 'b': i=30; break; /* Background */
                          case 'a': i=40; break; /* Action (Forms)? */
                } break;
                  case 10:
                        if(tolower(a)=='r') i++; 
                        else i=0;
                        break;
                  case 11:
                        if(tolower(a)=='c') do_img(hostname, pathname, fd); 
                        i=0;
                        break;
                  case 20:
                        if(tolower(a)=='r') i++; 
                        else i=0;
                        break;
                  case 21:
                        if(tolower(a)=='e') i++;
                        else i=0;
                        break;
                  case 22:
                        if(tolower(a)=='f') do_href(hostname, pathname, fd);
                        i=0;
                        break;
                  case 30:
                        if(tolower(a)=='a') i++;
                        else i=0;
                        break;
                  case 31:
                        if(tolower(a)=='c') i++;
                        else i=0;
                        break;
                  case 32:
                        if(tolower(a)=='k') i++;
                        else i=0;
                        break;
                  case 33:
                        if(tolower(a)=='g') i++;
                        else i=0;
                        break;
                  case 34:
                        if(tolower(a)=='r') i++;
                        else i=0;
                        break;
                  case 35:
                        if(tolower(a)=='o') i++;
                        else i=0;
                        break;
                  case 36:
                        if(tolower(a)=='u') i++;
                        else i=0;
                        break;
                  case 37:
                        if(tolower(a)=='n') i++;
                        else i=0;
                        break;
                  case 38:
                        if(tolower(a)=='d') do_img(hostname, pathname, fd);
                        i=0;
                        break;
                  case 40:
                        if(tolower(a)=='c') i++;
                        else i=0;
                        break;
                  case 41:
                        if(tolower(a)=='t') i++;
                        else i=0;
                        break;
                  case 42:
                        if(tolower(a)=='i') i++;
                        else i=0;
                        break;
                  case 43:
                        if(tolower(a)=='o') i++;
                        else i=0;
                        break;
                  case 44:
                        if(tolower(a)=='n') do_img(hostname, pathname, fd);
                        i=0;
                        break;
                }
          }
          putchar(a);
          continue;
    }
    /*~~~~~~~~~~~~~~ end of tag handling

           /* Ok, no tag.. must be word or something */
    if (!isalpha(a)) {  /* It's not a letter, so deal with non-words */
          if (b)  {                     /* If we are writing a word, end it */
            putchar(b);
            putchar('a');
            putchar('y');
            b=0;
      }
          putchar(a);           /* Else just send the non-letter        */
    } else {            /* Need to start a word, check for vowel and stuff */
          if (b==0) {
            if (!is_vowel(a)) b=a;
                else {
                        b='y';
                        putchar(a);
                }
          } else { /* Already writing a word, spit out letter */

                    /* 
                     * Ok, b is capital, let's uncapitalize it and cap. a.
                     * This should only happen once.  Also make sure that the
                     * Whole word isn't capital or we run into problems!
                     */
                if (b>64 && b<91) {     
                  if (a>64 && a<91) b+=32;                              
          else {
                    b+=32;
                    a-=32;
                  }
        }
                putchar(a);     
     }
    }

 } /* end while loop, reading/writing characters */

  exit(0);
}