"""
basictweetexlexer.py

This lexer is written to recognize basic tweetex syntax elements.
Here's a grammar of what it's meant to recognize.

Note: in EBNF, { anything in curly braces repeats,} so I've put parenthesis
around curly braces that I want to actually see in the source code.
This grammar was revised with great help from Jim.

<story> :== <preamble> <passage>+
<preamble> :== <id> <macro>*
<id> := "\ifid{" [\w]+ "}"
<macro> :== "\" <command> [ "{" <argument> "}" ]*
<command> :== "link" | "start" | "author" | "title"
<passage> :== "\passage" { "{" <argument> "}" } <text>
<char> :== [^\\\{\}]
<argument> :== <char>+ | <macro>
<text> :== <char>* {<macro>} {<text>}


    -- lexer --
    COMMAND     (including backslash)
    LEFTCURLY
    RIGHTCURLY
    CHARACTERS

    --- parser ---
    ... recursive stuff ...


And here's an example of a correctly written tweetex document.

\title{My Story}
\author{Nick Creel}
\ifid{0CA8C7C5-F219-4B1B-A3A8-45710F389818}
\start{Starting Passage}

\passage{Starting Passage}
This is some text in the first passage
\link{Second Passage}{This link goes to the second passage}

\passage{Second Passage}
This is some text in the second passage.

"""

import argparse
import codecs
import re

class Token:
    """ A lexer token or parser node. """
    
    def __init__(self, value, _type):
        self.value = value   # from lexer match 
        self._type = _type
        self.children = []
        
    def __str__(self):
        return f"Token: VALUE = '{self.value}', TYPE = {self.type}"

class Lexer:
    """
    >>> lex = Lexer("one two \start three \link{url} four")

    """
    
    def __init__(self, text):
        """
        >>> re.match(self.regexes['LEFTCURLY'], r"{foo").match
        '{'
        >>> re.match(self.regexes['RIGHTCURLY'], r"}foo").match
        '}'
        >>> re.match(self.regexes['COMMAND'], r"\foo bar").match
        'foo'
        >>> re.match(self.regexes['COMMAND'], r"\foo{bar}").match
        'foo'
        """
        self.text = text
        self.lines = []
        self.commands = ["link", "start", "author", "title", "ifid"]
        self.regexes = {"LEFTCURLY":   r'(\{)' ,          #  {
                        "RIGHTCURLY":  r'(\})' ,          #  }
                        "COMMAND":     r'(\\([a-z]+))',   #  \command
                        "CHARACTERS":  r'[^\\\{\}]+'}     #  .*

    def next_token(self):
        """ Look for a token at the start of the text.
            Create it, add it to the list of tokens, and remove matching text.
        """
        pass

if __name__ == '__main__':
    import doctest
    doctest.testmod()