""" sqllexer.py a lexer written with SLY thadt tokenizes input according to a simplified version of the SQL syntax provided by professor Shun Yan Cheung at Emory College: ::= SELECT FROM WHERE ::= | , ::= | , ::= AND | IN ( ) | = | LIKE for some reason, the author didn't define the tokens like so I attempted to glean what they look like from some examples of SQL + the example provided. http://www.mathcs.emory.edu/~cheung/Courses/554/Syllabus/5-query-opt/SQL-grammar.html https://en.wikipedia.org/wiki/SQL_syntax nick creel - programming languages - sp19 """ from sly import Lexer class SQLexer(Lexer): tokens = {NAME, NUMBER, PATTERN, NUMBER, AND, IN, EQ, LIKE, SELECT, FROM, WHERE, COMMA, LPAREN, RPAREN, SEMICOLON } ignore = ' \t' #Reserved AND = r'AND' IN = r'IN' EQ = r'=' LIKE = r'LIKE' SELECT = r'SELECT' FROM = r'FROM' WHERE = r'WHERE' COMMA = r',' LPAREN = r'\(' RPAREN = r'\)' SEMICOLON = r';' #Tokens NAME = r'[a-zA-Z\*][a-zA-Z0-9_\.]*' PATTERN = r'"[a-zA-Z0-9.][a-zA-Z0-9.]*"' NUMBER = r'\d+' ignore_newline = r'\n+' def ignore_newline(self, t): selfe.lineno += t.value.count('\n') def error(self, t): print("illegal character '{}'".format(t.value[0])) self.index += 1 class Stack(): def __init__(self): self.values = [] def push(self, value): self.values.append(value) def pop(self): return self.values.pop() def isEmpty(self): return self.values == [] def size(self): return len(self.values) class Queue(Stack): def enqueue(self, item): self.values.insert(0, item) def peek(self): return self.values[-1] def query(queue, stack): ''' matches QUERY if first token in queue is SELECT. ''' atoken = queue.pop() if atoken.type == 'SELECT': stack.push("QUERY") sellist(queue, stack) elif queue.isEmpty(): stack.push("EOF") return else: print("The input does not follow the specification") def sellist(queue, stack): '''matches sellist when given queue from query if input matches specification''' atoken = queue.pop() nexttok = queue.peek() if atoken.type == "NAME": stack.push("Attribute") stack.push(atoken.value) if nexttok.type == "COMMA": comma = queue.pop() stack.push("COMMA") sellist(queue, stack) elif nexttok.type == "FROM": queue.pop() stack.push("FROM") fromlist(queue, stack) else: print("the input does not match the specification") pass def fromlist(queue, stack): atoken = queue.pop() nexttok = queue.peek() if atoken.type == "NAME": stack.push("Relation") stack.push(atoken.value) if nexttok.type == "COMMA": queue.pop() stack.push("COMMA") fromlist(queue, stack) elif nexttok.type == "WHERE": queue.pop() stack.push("WHERE") condition(queue, stack) else: print("the input does nto match the specification") pass def condition(queue, stack): # TODO: finish LIKE case atoken = queue.pop() nexttok = queue.peek() temp = Stack() # to keep track of subtree in the case that there's an AND # how to handle the case condition and condition? if atoken.type == "NAME": temp.push("Attribute") temp.push(atoken.value) if nexttok.type == "IN": queue.pop() temp.push("IN") for i in temp.values: stack.values.append(i) nexttok = queue.peek() if nexttok.type == "LPAREN": queue.pop() stack.push("LPAREN") query(queue, stack) stack.push("RPAREN") nexttok == queue.peek() if nexttok.type == "SEMICOLON": queue.pop() stack.push("SEMICOLON") elif nexttok.type == "EQ": queue.pop() temp.push("ASSIGN") for i in temp.values: stack.values.append(i) nexttok = queue.peek() if nexttok.type == "NAME": queue.pop() stack.push("Attribute") stack.push(nexttok.value) elif nexttok.type == "NUMBER": queue.pop() stack.push("NUMBER") stack.push(nexttok.value) if queue.isEmpty(): return elif nexttok.type == "LIKE": queue.pop() temp.push("LIKE") for i in temp.values: stack.values.append(i) nexttok = queue.peek() if nexttok.type == "PATTERN": queue.pop() stack.push("PATTERN") nexttok = queue.peek() if nexttok.type == "SEMICOLON": queue.pop() stack.push("SEMICOLON") elif nexttok.type == "AND": queue.pop() temp.push("AND") for i in temp.values: stack.values.append(i) condition(queue,stack) nexttok = queue.peek() if nexttok == "SEMICOLON": queue.pop() stack.push("SEMICOLON") def parse(queue, stack): query(queue, stack) print(stack.values) if __name__ == '__main__': lexer = SQLexer() stack = Stack() # for parse tree queue = Queue() # for input while True: try: text = input('(SQL)>> ') except EOFError: break if text: for tok in lexer.tokenize(text): queue.enqueue(tok) parse(queue, stack) break else: break