/**************************************************************************** * HTML to Pig Latin HTML Conversion Script * Written By Josh Rantane * Last updated: Nov 21, 1995 * http://voyager.cns.ohiou.edu/~jrantane/cgi/pig/pig.c * * modified slightly by Jim Mahoney * 8/98 - added INADDR and PIG_ADDRESS, on akbar.marlboro.edu * 10/03 - removed INADDR stuff, on cs.marlboro.edu, put in new address explicitly * compiled with "gcc pig.c -o pig.cgi" * ****************************************************************************/ /* * Changes * ------- * Nov 21 - Make the pig latin better. Worry about the case of vowels. * * July 25, 1996: * Did I mention that this thing was the biggest hack from hell?!?!? * 1) Add stuff so that people who don't freaking use '"' like they * supposed to in A HREF/IMG tags still work. (Damn Bastards :)). */ #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG 0 int is_vowel(char ch) { switch(ch) { case 'a': case 'A': case 'e': case 'E': case 'i': case 'I': case 'o': case 'O': case 'u': case 'U': return(1); break; default: return(0); } /******* no way to get here, so I commented it out. - Jim /* return(0); */ } /* * OK, so we just got the '') && (a!='\0') && (i<100);recv(fd, &a, 1, 0)) { switch(i) { case 0: putchar(a); if(tolower(a)=='=') i++; break; case 1: /* if(tolower(a)=='h') { i+=2; break; } */ /* putchar(a); */ if(tolower(a)=='"') { putchar(a); i++; break; } else if(isspace(a)) { putchar(a); break; } else i++; /* break; */ case 2: if(tolower(a)=='h') i++; else if(tolower(a)=='f') i=7; else if(tolower(a)=='m') i=10; else if(tolower(a)=='n') i=20; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s", hostname); if (a!='/') printf("/%s%c", pathname, a); else putchar(a); i=100; } break; case 3: if(tolower(a)=='t') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/h%c", hostname, a); i=100; } break; case 4: if(tolower(a)=='t') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ht%c", hostname, a); i=100; } break; case 5: if(tolower(a)=='p') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/htt%c", hostname, a); i=100; } break; case 6: if(tolower(a)==':') { i=100; printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http:"); } else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/http%c", hostname, a); i=100; } break; case 7: if(tolower(a)=='t') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/f%c", hostname, a); i=100; } break; case 8: if(tolower(a)=='p') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ft%c", hostname, a); i=100; } break; case 9: i=100; if(tolower(a)==':') { printf("ftp:"); } else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ftp%c", hostname, a); } break; case 10: if(tolower(a)=='a') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/m%c", hostname, a); i=100; } break; case 11: if(tolower(a)=='i') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ma%c", hostname, a); i=100; } break; case 12: if(tolower(a)=='l') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mai%c", hostname, a); i=100; } break; case 13: if(tolower(a)=='t') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mait%c", hostname, a); i=100; } break; case 14: if(tolower(a)=='o') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mailt%c", hostname, a); i=100; } break; case 15: i=100; if(tolower(a)==':') { printf("mailto:"); } else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/mailto%c", hostname, a); } break; case 20: if(tolower(a)=='e') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/n%c", hostname, a); i=100; } break; case 21: if(tolower(a)=='w') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/ne%c", hostname, a); i=100; } break; case 22: if(tolower(a)=='s') i++; else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/new%c", hostname, a); i=100; } break; case 23: i=100; if(tolower(a)==':') { printf("news:"); } else { printf("http://cs.marlboro.edu/term/fall03/perl/pig/pig.cgi?http://%s/news%c", hostname, a); } break; } } putchar(a); return; } void do_img(char *hostname, char *pathnameo, int fd) { int i; char a; char *pathname; char pathnamen[2048]; pathname=strcpy(pathnamen, pathnameo); if (DEBUG) { printf("\nDEBUGGING INFO: hostname = %s\n", hostname); printf("DEBUGGING INFO: pathname = %s\n", pathname); } if (strncmp(pathname, "/", 1)==0) *(pathname++)='\0'; for (recv(fd, &a, 1, 0),i=0;(a!='>') && (a!='\0') && (i<10);recv(fd, &a, 1, 0)) { switch(i) { case 0: putchar(a); if(tolower(a)=='=') i++; else if(!isspace(a)) i=10; break; case 1: /* if(tolower(a)=='h') { i+=2; break; } */ /* putchar(a); */ if(tolower(a)=='"') { i++; putchar(a); break; } else if(isspace(a)) { putchar(a); break; } else i++; /* Don't break, we still gotta figure out what to do */ case 2: if(tolower(a)=='h') i++; else { printf("http://%s", hostname); if (a!='/') printf("/%s%c", pathname, a); else putchar(a); i=10; } break; case 3: if(tolower(a)=='t') i++; else { printf("http://%s/%sh%c", hostname, pathname, a); i=10; } break; case 4: if(tolower(a)=='t') i++; else { printf("http://%s/%sht%c", hostname, pathname, a); i=10; } break; case 5: if(tolower(a)=='p') i++; else { printf("http://%s/%shtt%c", hostname, pathname, a); i=10; } break; case 6: if(tolower(a)==':') { i=10; printf("http:"); } else { printf("http://%s/%shttp%c", hostname, pathname, a); i=10; } break; } } putchar(a); return; } char x2c(char *what) { /* * Note: This function is stolen from ncsa and their form stuff.. * Thanks ncsa! */ register char digit; digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A')+10 : (what[0] - '0')); digit *= 16; digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A')+10 : (what[1] - '0')); return(digit); } void unescape_url(char *url) { /* * Note: This function is stolen from ncsa and their form stuff.. * Thanks ncsa! * PS- You know, I still don't quite know how this works... */ register int x,y; for(x=0,y=0;url[y];++x,++y) { if((url[x] = url[y]) == '%') { url[x] = x2c(&url[y+1]); y+=2; } } url[x] = '\0'; } int main(int argc, char *argv[]) { char a,b,*hostname,*pathname, *path_file, *p; char buff[254]; char smlbuff[254]; struct hostent *host; struct servent *port; struct protoent *proto; struct sockaddr_in sin; int fd, type, n; int in_tag =0; int i; FILE *fp; char *service = "80"; char *protocol = "tcp"; printf("Content-type: text/html\n\n"); /* * Parse the URL */ if (argc > 2) { printf("ERROR, wrong number of args. Syntax is : %s \n", argv[0]); exit(0); } if (DEBUG) { printf("\nDEBUGGING INFO: argv[0] = %s\n", argv[0]); } if (argc == 1) { /* * Ok, it's being sent through enviroment vars... * We need to take off the TEXT= part and anything that follows. * check to make sure they did the http:// thing.. :// = spc chars.. */ p=pathname=hostname+=5; unescape_url(p); for(p=hostname;*p!='&' && *p!='\0';p++) ; *p++='\0'; if (strncmp(hostname, "http://", 7) || (strlen(hostname) < 8)) { printf("Error in URL: %s (Try using http://hostname/path)\n", argv[1]); exit(0); } pathname=hostname+=7; } else { if (strncmp(argv[1], "http://", 7) || (strlen(argv[1]) < 8)) { printf("Error in URL: %s (Try using http://hostname/path)\n", argv[1]); exit(0); } pathname=hostname=argv[1]+7; } /* * The path is after the hostname, so everything after the first * front slash (/) is the pathname + the filename if there is a file * on there. */ for (;(strncmp((char *)pathname,"/", 1) != 0);pathname++) if (*pathname=='\0') { *pathname++='/'; *pathname++='\0'; *pathname++='\0'; pathname-=3; break; } *pathname++='\0'; if (!strncmp(pathname, "\\", 1)) *pathname++='\0'; /* * Now that we have the pathname, check to see if there is a file * on the end of it. File is gonna be something.html or we'll also * allow those damn windows stuff (something.htm). * also if there's no file at the end, make sure they put the '/' on * it. */ path_file=(pathname+strlen(pathname))-4; if (*path_file++=='h') { if (*path_file++=='t') { if (*path_file++=='m') { while (*path_file--!='/') ; path_file++; *path_file++='\0'; } else path_file=NULL; } else path_file=NULL; } else if (*path_file++=='h') { if (*path_file++=='t') { if (*path_file++=='m') { while (*path_file--!='/') ; path_file++; *path_file++='\0'; } else path_file=NULL; } else path_file=NULL; } else path_file=NULL; if ((path_file==NULL) && (*(p=(pathname+strlen(pathname)-1)) != '/')) { *(++p)='/'; *(++p)='\0'; } if (DEBUG) { printf("\nDEBUGGING: path_file = %s\n", path_file); } /* * Before we do any networking, let's try to find a port number. * To do this, we start at the end of hostname, and search for * a ':', if we come to a '.' before the colon, then there's no * port number. If there is a port number then we need to save that * into service, and null the ':'. After we are done with * getting the bind/connect/socket stuff, ':' the null back. * for later use. */ for(p=(hostname+strlen(hostname)-1);*p!=':' && *p!='.';p--) ; if (*p==':') { *p++='\0'; service=p; p--; } if (DEBUG) { printf("\nDEBUGGING INFO: hostname = %s\n", hostname); printf("DEBUGGING INFO: pathname = %s\n", pathname); printf("DEBUGGING INFO: path_file = %s\n", path_file); } /* * Now we connect to the URL * All of this code is ripped off of Scott Adkins and Dr. Ostermann * BUWHAHAHAHAHAHA! */ bzero((char *)&sin, sizeof(sin)); sin.sin_family = AF_INET; /** debugging (Jim): **/ /* printf( " Before crash \n "); */ /* foo = (sin.sin_addr.s_addr = inet_addr(hostname)); */ /* printf( "\n INADDR : %d \n ", foo); */ /* map hostname to IP address, allowing for dotted decimal */ if ((host = gethostbyname(hostname))!=NULL) bcopy(host->h_addr, (char *)&sin.sin_addr, host->h_length); else if ((sin.sin_addr.s_addr = inet_addr(hostname)) == INADDR_NONE) { printf("can't get %s host entry\n", hostname); exit(0); } /* map service name to port number */ if ((port = getservbyname(service, protocol)) != NULL) sin.sin_port = port->s_port; else if (!(sin.sin_port = htons((u_short)atoi(service)))) { printf("can't get %s service entry\n", service); exit(0); } /* map protocol name to protocol number */ if (!(proto = getprotobyname(protocol))) { printf("can't get %s protocol entry\n", protocol); exit(0); } /* use protocol to choose a socket type */ type = SOCK_STREAM; /* allocate a socket */ if ((fd = socket(PF_INET, type, proto->p_proto)) < 0) { printf("could not create socket"); exit(0); } if (connect(fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { close(fd); /* lets not be leaving any descriptors around */ printf("can't connect to %s.%s\n", hostname, service); exit(0); } /* * Now restore *p to ':' instead of null, if it is null */ if (*p=='\0') *p=':'; /* fp = fopen("test", "w"); */ if (path_file!=NULL) { if (!strncmp(pathname, "/", 1)) sprintf(buff, "GET %s/%s\n", pathname, path_file); else sprintf(buff, "GET /%s/%s\n", pathname, path_file); } else { if (!strncmp(pathname, "/", 1)) sprintf(buff, "GET %s\n", pathname); else sprintf(buff, "GET /%s\n", pathname); } send(fd, buff, strlen(buff), 0); /* * We don't need the path_file any more.. but we would like pathname * to end in a '/'.. so if it's not already, make it that way. */ if (*(pathname+strlen(pathname)-1) != '/') { *path_file--='\0'; *path_file='/'; } /************************* * * Here's where we start reading in and writing out the file * ***********************************/ b=0;/* b=0 means we aren't at a word */ /* else we are writing a word */ while(recv(fd, &a, 1, 0)) { /*~~~~~~~~~~~~~~~~~~~~ /* This is the beginning of a tag */ if (a=='<') { if (b) { /* We are writing a word, so end it. */ putchar(b); putchar('a'); putchar('y'); b=0; } putchar(a); /* * Ok, we just started a tag, and we might wanna mess with it.. * so lets deal with tags right here... */ for (recv(fd, &a, 1, 0),i=0;(a!='\0') && (a!='>');recv(fd, &a, 1, 0)) { putchar(a); switch(i) { case 0: switch(tolower(a)) { case 's': i=10; break; /* Src? */ case 'h': i=20; break; /* HREF? */ case 'b': i=30; break; /* Background */ case 'a': i=40; break; /* Action (Forms)? */ } break; case 10: if(tolower(a)=='r') i++; else i=0; break; case 11: if(tolower(a)=='c') do_img(hostname, pathname, fd); i=0; break; case 20: if(tolower(a)=='r') i++; else i=0; break; case 21: if(tolower(a)=='e') i++; else i=0; break; case 22: if(tolower(a)=='f') do_href(hostname, pathname, fd); i=0; break; case 30: if(tolower(a)=='a') i++; else i=0; break; case 31: if(tolower(a)=='c') i++; else i=0; break; case 32: if(tolower(a)=='k') i++; else i=0; break; case 33: if(tolower(a)=='g') i++; else i=0; break; case 34: if(tolower(a)=='r') i++; else i=0; break; case 35: if(tolower(a)=='o') i++; else i=0; break; case 36: if(tolower(a)=='u') i++; else i=0; break; case 37: if(tolower(a)=='n') i++; else i=0; break; case 38: if(tolower(a)=='d') do_img(hostname, pathname, fd); i=0; break; case 40: if(tolower(a)=='c') i++; else i=0; break; case 41: if(tolower(a)=='t') i++; else i=0; break; case 42: if(tolower(a)=='i') i++; else i=0; break; case 43: if(tolower(a)=='o') i++; else i=0; break; case 44: if(tolower(a)=='n') do_img(hostname, pathname, fd); i=0; break; } } putchar(a); continue; } /*~~~~~~~~~~~~~~ end of tag handling /* Ok, no tag.. must be word or something */ if (!isalpha(a)) { /* It's not a letter, so deal with non-words */ if (b) { /* If we are writing a word, end it */ putchar(b); putchar('a'); putchar('y'); b=0; } putchar(a); /* Else just send the non-letter */ } else { /* Need to start a word, check for vowel and stuff */ if (b==0) { if (!is_vowel(a)) b=a; else { b='y'; putchar(a); } } else { /* Already writing a word, spit out letter */ /* * Ok, b is capital, let's uncapitalize it and cap. a. * This should only happen once. Also make sure that the * Whole word isn't capital or we run into problems! */ if (b>64 && b<91) { if (a>64 && a<91) b+=32; else { b+=32; a-=32; } } putchar(a); } } } /* end while loop, reading/writing characters */ exit(0); }