Sep 28
Considering the state of my Python lexer and that I actually want to be working with a LISP-like syntax, I wrote another lexer (~even with a little error handling~).
#lang racket
#| toy_lisp_lex.rkt
Description:
A simple LISP/scheme Tokenizer.
To use:
$ racket toy_lisp_lex.rkt
Racket 6.3 | 9/23/16 | License MIT
|#
(require parser-tools/lex)
(require parser-tools/lex-sre)
(define end-of-file #f)
(define-lex-abbrev comment? (or (: #\; (* (char-complement #\newline)))
(: "#|" (complement (: any-string "|#" any-string)) "|#")))
(define-lex-abbrev meta-word? (: #\# (* alphabetic) #\space))
(define-lex-abbrev literal?
(or (+ (char-range #\0 #\9))
(: (* (char-range #\0 #\9)) "." (or (* (char-range #\0 #\9)) (* (char-range #\a #\z))))
(: #\" (* any-char) #\")
(: #\' any-char #\')
(or "#f" "#t")))
(define-lex-abbrev identifier? (: (+ alphabetic)
(* (or alphabetic
#\- #\> #\< #\_ #\+ #\. #\/ #\= #\! #\?
#\: #\$ #\% #\& #\~ #\^))))
(define-lex-abbrev lparen? (or #\( #\[ #\{))
(define-lex-abbrev rparen? (or #\) #\] #\}))
(define-lex-abbrev symbol? (: #\' (: (+ alphabetic)
(* (or alphabetic
#\- #\> #\< #\_ #\+ #\. #\/ #\= #\! #\?
#\: #\$ #\% #\& #\~ #\^)))))
(define-lex-abbrev keyword? (or "=>" "do" "or"
"and" "else" "quasiquote"
"begin" "if" "quote"
"case" "lambda" "set!"
"cond" "let" "unquote"
"define" "let*" "unquote-splicing"
"delay" "letrec" "+"
"-" "*" "/"
"mod" "not" "equal?"
">" "<" ">="
">=" "="))
; A lexer to tokenize python files
(define calc-lexer
(lexer
[comment? (void)]
[meta-word? (printf "(META-WORD ~a)\n" lexeme)]
[lparen? (printf "(LEFT PAREN)\n")]
[rparen? (printf "(RIGHT PAREN)\n")]
[symbol? (printf "(SYM ~a)\n" lexeme)]
[#\' (printf "(QUOTE)\n")] ;; Quote for next s-exp
[literal? (printf "(LIT ~a)\n" lexeme)]
[keyword? (printf "(KEYWORD ~a)\n" lexeme)]
[identifier? (printf "(ID ~a)\n" lexeme)]
[(or #\newline #\space "") (void)] ;; Ignore
[any-char (printf "NOT HANDLED ~a \n" lexeme)]
[(eof) (and (set! end-of-file #t) (printf "(EOF)\n"))]))
(define test '())
; takes a input stream and parses until eof
(define reader
(lambda (file)
(if end-of-file '() (and (calc-lexer file) (reader file)))))
; Run over a file specified at the command line call
(with-handlers ([exn:fail?
(lambda (exn)
(printf "Please provide a file name to lex...\n"))])
(set! test (open-input-file
(vector-ref (current-command-line-arguments) 0)))
(reader test))
I have attached a sample program to lex and its' resulting output.
LL, LR, SLR, LALR, LLARLALSLRAL...yeah, parsing...
I worked through Matt Mights' slides and the following web pages: