18 files changed, 4350 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5eefa19
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__/
+*.egg-info/
+venv/
+parser.out
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a0a9b12
--- /dev/null
+++ b/README.md
@@ -0,0 +1,51 @@
+# Journal-lib
+Simple library for working with ledger journal files in python.
+
+This library is mainly compromised of a parser for journal files,
+and is intended to extract journal information to use for other processing.
+It is mainly not intended to be used for modifying journal files,
+as it will not be able to reproduce the journal files the way they looked
+before being parsed.
+
+The lexer and parser is powered by [PLY (Python Lex-Yacc)](https://github.com/dabeaz/ply/tree/master).
+The sourcecode of PLY is included in this package in the `src/journal_lib/parse/ply/` directory.
+Any licenses or copyright on other parts of this project is not applied to these files.
+Please look at the header of these files for their copyright information.
+
+## Usage
+There are two main methods for parsing a journal.
+```
+from journal_lib import journal_from_str, journal_from_file
+
+# Parse a journal consisting of 0 or more entries directly from a string
+journal = journal_from_str("str")
+# Parse a journal consisting of 0 or more entries from a file,
+# recursively following all include statements
+journal = journal_from_file(Path("personal.journal"))
+```
+
+The interface is defined in `src/dataclasses.py`.
+The `Journal` object is returned from the mentioned functions.
+It contains all the parsed lines (apart from global comments), with classes
+for each separable part of the code.
+The easiest way to use this is probably to just read those codes. 
+These dataclasses generally only have a `__str__` method in addition to their fields.
+
+This library is not fully featured, but it does support a lot of the most common
+parts of the ledger journal format.
+
+## Why
+There doesn't seem to be any parsing libraries which has a clear datamodel
+for its output from parsing.
+So I wanted to make something which has the potential to be easily extended.
+Since the grammar is built using PLY, it should be relatively straight forward
+to add support for missing features, or even make a separate variant of the
+lex/parse definitions for other flavours such as hledger.
+
+## License
+This doesn't have a license yet. 
+Please don't make close-sourced copies of it,
+but feel free to use it as you wish as a library,
+as long as you make any changes or improvements to the code available to
+other people freely (as in speech). 
+
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..75a8e9f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,40 @@
+[build-system]
+requires = ["setuptools>=61.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "journal-lib"
+version = "0.0.1"
+description = "Simple library for working with ledger journal files in python"
+readme = "README.md"
+authors = [{ name = "jakobst1n" }]
+license = { file = "LICENSE" }
+classifiers = [
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+]
+keywords = []
+dependencies = []
+requires-python = ">=3.9"
+
+[project.optional-dependencies]
+dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]
+
+[project.scripts]
+testparse = "journal_lib.utils:test"
+
+[tool.bumpver]
+current_version = "1.0.0"
+version_pattern = "MAJOR.MINOR.PATCH"
+commit_message = "bump version {old_version} -> {new_version}"
+commit = true
+tag = true
+push = false
+
+[tool.bumpver.file_patterns]
+"pyproject.toml" = [
+    'current_version = "{version}"',
+    'version = "{version}"'
+]
+"src/journal_lib/__init__.py" = [ "{version}" ]
+
diff --git a/src/journal_lib/__init__.py b/src/journal_lib/__init__.py
new file mode 100644
index 0000000..82172dc
--- /dev/null
+++ b/src/journal_lib/__init__.py
@@ -0,0 +1,3 @@
+__version__ = "1.0.0"
+
+from .utils import journal_from_str, journal_from_file
diff --git a/src/journal_lib/dataclasses.py b/src/journal_lib/dataclasses.py
new file mode 100644
index 0000000..227cabe
--- /dev/null
+++ b/src/journal_lib/dataclasses.py
@@ -0,0 +1,135 @@
+import sys
+from dataclasses import dataclass
+
+FORCE_NOCOLOR = False
+
+def set_force_nocolor(value: bool):
+    global FORCE_NOCOLOR
+    FORCE_NOCOLOR = value
+
+def anesc(code: str):
+    return "" if FORCE_NOCOLOR or not sys.stdout.isatty() else f"\u001b[{code}"
+
+def format_amount(amount: str, currency: str | None = None) -> str:
+    if currency is None:
+        return amount
+    if currency in ['NOK']:
+        return f"{amount} {currency}"
+    if currency in ['$']:
+        return f"{currency}{amount}"
+    return f"{amount} {currency}"
+
+@dataclass
+class JournalEntryTransaction:
+    account: str
+    currency: str | None
+    amount: str | None
+    comment: str | None
+
+@dataclass
+class JournalEntry:
+    date: str
+    cleared: bool
+    pending: bool
+    title: str
+    effective_date: str | None
+    transactions: list[JournalEntryTransaction]
+    comments: list[str]
+
+    def __str__(self):
+        s = f"{anesc('34m')}{self.date}{anesc('0m')}"
+        if self.effective_date is not None:
+            s += f"={anesc('36m')}{self.effective_date}{anesc('0m')}"
+        if self.cleared:
+            s += f" {anesc('31m')}*{anesc('0m')}"
+        if self.pending:
+            s += f" {anesc('31m')}!{anesc('0m')}"
+        s += f"  {anesc('36m')}{self.title}{anesc('0m')}\n"
+
+        max_account_len = max(len(x.account) for x in self.transactions)
+        for transaction in self.transactions:
+            t = f"  {anesc('33m')}{transaction.account:{max_account_len}}{anesc('0m')}"
+            if transaction.amount is not None:
+                amount_code = "32m"
+                if (transaction.amount[0] == "-"):
+                    amount_code = "31m"
+                t += f"  {anesc(amount_code)}{format_amount(transaction.amount, transaction.currency)}{anesc('0m')}"
+            if transaction.comment is not None:
+                t += f"  {anesc('38m')}{transaction.comment}{anesc('0m')}"
+            s += t + f"{anesc('0m')}\n"
+
+        for comment in self.comments:
+            s += f"  {anesc('38m')}{comment}{anesc('0m')}\n"
+        return s + "\n"
+
+@dataclass
+class JournalAccountDef:
+    account: str
+    comment: str | None
+
+    def __str__(self):
+        s = f"{anesc('38m')}account {anesc('33m')}{self.account}{anesc('0m')}"
+        if self.comment is not None:
+            s += f" {anesc('38m')}{self.comment}{anesc('0m')}"
+        return s + "\n"
+
+@dataclass
+class JournalCommodityDef:
+    commodity: str
+    format: str | None = None
+    note: str | None = None
+    nomarket: bool = False
+    default: bool = False
+
+    def __str__(self):
+        s = f"{anesc('38m')}commodity {anesc('33m')}{self.commodity}{anesc('0m')}\n"
+        for attr in ["format", "note", "nomarket", "default"]:
+            if hasattr(self, attr) and getattr(self, attr) is not None:
+                if isinstance(getattr(self, attr), bool) and not getattr(self, attr):
+                    continue
+                s += f"    {anesc('38m')}{attr}{anesc('0m')}"
+                if isinstance(getattr(self, attr), str):
+                    s += f" {anesc('36m')}{getattr(self, attr)}{anesc('0m')}"
+                s += "\n"
+        return s
+
+@dataclass
+class Journal:
+    entries: list[JournalEntry]
+    accounts: list[JournalAccountDef]
+    commodities: list[JournalCommodityDef]
+
+    def __str__(self):
+        s = ""
+
+        for account in self.accounts:
+            s += f"{account}"
+        s += "\n"
+
+        for commodity in self.commodities:
+            s += f"{commodity}"
+        s += "\n"
+
+        for entry in self.entries:
+            s += f"{entry}"
+
+        return s
+
+    @staticmethod
+    def from_elements(elements: list[str | JournalEntry]):
+        entries = []
+        accounts = []
+        commodities = []
+
+        for element in elements:
+            if isinstance(element, JournalEntry):
+                entries.append(element)
+            elif isinstance(element, JournalAccountDef):
+                accounts.append(element)
+            elif isinstance(element, JournalCommodityDef):
+                commodities.append(element)
+            else:
+                print(f"INVALID ELEMENT {element}")
+
+        return Journal(entries=entries, accounts=accounts, commodities=commodities)
+
diff --git a/src/journal_lib/parse/__init__.py b/src/journal_lib/parse/__init__.py
new file mode 100644
index 0000000..a80caa3
--- /dev/null
+++ b/src/journal_lib/parse/__init__.py
@@ -0,0 +1,5 @@
+__version__ = "1.0.0"
+
+from .parsers.p_ledger import JournalParser
+from .lexers.l_ledger import JournalLexer
+from .preprocessing import preprocess_includes
diff --git a/src/journal_lib/parse/lexers/__init__.py b/src/journal_lib/parse/lexers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/journal_lib/parse/lexers/__init__.py
diff --git a/src/journal_lib/parse/lexers/l_ledger.py b/src/journal_lib/parse/lexers/l_ledger.py
new file mode 100644
index 0000000..6f63626
--- /dev/null
+++ b/src/journal_lib/parse/lexers/l_ledger.py
@@ -0,0 +1,196 @@
+from journal_lib.parse.lexwrapper import LexWrapper
+
+class JournalLexer(LexWrapper):
+    states = (
+        ('sHEADER', 'exclusive'),        # Entry header parsing state
+        ('sHEADEREFF', 'exclusive'),     # Entry header effective date parsing state
+        ('sENTRY', 'exclusive'),         # Entry parsing state
+        ('sBLOCKCOMMENT', 'exclusive'),  # Block comment parsing state
+        ('sACCOUNT', 'exclusive'),       # Account definition parsing state
+        ('sCOMMODITY', 'exclusive'),     # Commodity definition parsing state
+    )
+
+    reserved = {
+        "account": 'KW_ACCOUNT',
+        "commodity": 'KW_COMMODITY'
+    }
+
+    tokens = (
+        'TEXT',
+        'AMOUNT',
+        'CURRENCY',
+        'COMMENT',
+        'INLINE_COMMENT',
+        'DATE',
+        'ENTRY_STATUS',
+        'ENTRY_EFFECTIVE_DATE_SEPARATOR',
+        'COMMODITY_NOTE',
+        'COMMODITY_FORMAT',
+        'COMMODITY_NOMARKET',
+        'COMMODITY_DEFAULT',
+    ) + tuple(reserved.values())
+
+    t_ANY_ignore = ' \t'
+
+    literals = '\n'
+
+    # Rules for the 'initial' state
+
+    def t_INITIAL_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        self._state_begin('sHEADER', t)
+        return t
+
+    def t_INITIAL_eof(self, t):
+        pass
+
+    def t_INITIAL_COMMENT(self, t):
+        r'(;|\#|\%|\||\*).+\n'
+        pass
+
+    def t_INITIAL_BLOCKCOMMENT(self, t):
+        r'comment'
+        self._state_begin('sBLOCKCOMMENT', t)
+
+    def t_INITIAL_KEYWORD(self, t):
+        r'[a-zA-Z_][a-zA-Z_0-9]*'
+        t.type = self.reserved.get(t.value,'KW')
+        if t.type == "KW_ACCOUNT":
+            self._state_begin('sACCOUNT', t)
+        if t.type == "KW_COMMODITY":
+            self._state_begin('sCOMMODITY', t)
+        return t
+
+    # Rules for the 'sBLOCKCOMMENT' state
+
+    def t_sBLOCKCOMMENT_end(self, t):
+        r'end\scomment'
+        t.lexer.lineno += t.value.count('\n')
+        self._state_begin('INITIAL', t)
+
+    def t_sBLOCKCOMMENT_content(self, t):
+        r'.+?\n'
+
+    def t_sBLOCKCOMMENT_error(self, t):
+        r.lexer.skip(1)
+
+    # Rules for the 'sACCOUNT' state
+
+    def t_sACCOUNT_TEXT(self, t):
+        r'("[^"]+")|([^\n;]+)'
+        if t.value.startswith('"') and t.value.endswith('"'):
+            t.value = t.value[1:-1]
+        t.value = t.value.rstrip()
+        return t
+
+    def t_sACCOUNT_COMMENT(self, t):
+        r'(;)[^\n]*'
+        return t
+
+    def t_sACCOUNT_newline(self, t):
+        r'\n'
+        self._state_begin('INITIAL', t)
+
+    # Rules for the 'sCOMMODITY' state
+
+    def t_sCOMMODITY_KW(self, t):
+        r'(note|format|nomarket|default)'
+        if t.value == "note":
+            t.type = "COMMODITY_NOTE"
+        elif t.value == "format":
+            t.type = "COMMODITY_FORMAT"
+        elif t.value == "nomarket":
+            t.type = "COMMODITY_NOMARKET"
+        elif t.value == "default":
+            t.type = "COMMODITY_DEFAULT"
+
+        return t
+
+    def t_sCOMMODITY_TEXT(self, t):
+        r'[^\n]+'
+        return t
+
+    def t_sCOMMODITY_newline(self, t):
+        r'\n(?=(\s*\n|\s*$|[^\s]))'
+        self._state_begin('INITIAL', t)
+
+    # Rules for the 'sheader' state
+
+    def t_sHEADER_ENTRY_STATUS(self, t):
+        r'(\*|!)'
+        return t
+
+    def t_sHEADER_ENTRY_EFFECTIVE_DATE_SEPARATOR(self, t):
+        r'='
+        self._state_begin('sHEADEREFF', t)
+        return t
+
+    def t_sHEADER_TEXT(self, t):
+        r'[^\n]+'
+        if ((t.value.startswith('"') and t.value.endswith('"'))
+            or (t.value.startswith("'") and t.value.endswith("'"))):
+            t.value = t.value[1:-1] 
+        return t
+
+    def t_sHEADER_newline(self, t):
+        r'\n'
+        self._state_begin('sENTRY', t)
+
+    # Rules for the  'sheader_effective_date' state
+
+    def t_sHEADEREFF_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        self._state_begin('sHEADER', t)
+        return t
+
+    # Rules for the 'sentry' state
+
+    def t_sENTRY_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        return t
+
+    def t_sENTRY_CURRENCY(self, t):
+        r'\$|NOK'
+        return t
+
+    def t_sENTRY_AMOUNT(self, t):
+        r'(-)?(\d|\,)+(\.\d{2})?'
+        return t
+
+    def t_sENTRY_COMMENT(self, t):
+        r';[^\n]*'
+        # Check if the comment is at the start of a line (considering whitespaces)
+        line_start = t.lexer.lexdata.rfind('\n', 0, t.lexpos) + 1
+        pre_comment = t.lexer.lexdata[line_start:t.lexpos]
+        
+        # If the comment is at the start of a line, it's a standalone comment
+        if pre_comment.isspace() or pre_comment == "":
+            t.type = "COMMENT"
+        else:
+            t.type = "INLINE_COMMENT"
+        return t
+
+    def t_sENTRY_TEXT(self, t):
+        r'[^\n;]+?(?=\s{2,}|$|;)'
+        if t.value.startswith('"') and t.value.endswith('"'):
+            t.value = t.value[1:-1]
+        t.value = t.value.rstrip()
+        return t
+
+    def t_sENTRY_newline(self, t):
+        r'\n\n'
+        self._state_begin('INITIAL', t)
+
+    def t_sENTRY_eof(self, t):
+        self._state_begin('INITIAL', t)
+
+    # Common rules
+
+    def t_ANY_newline(self, t):
+        r'\n+'
+        t.lexer.lineno += len(t.value)
+
+    def t_ANY_error(self, t):
+        self._hl_token(t)
+        t.lexer.skip(1)
+
diff --git a/src/journal_lib/parse/lexers/l_ledger.py.bak b/src/journal_lib/parse/lexers/l_ledger.py.bak
new file mode 100644
index 0000000..b0f5382
--- /dev/null
+++ b/src/journal_lib/parse/lexers/l_ledger.py.bak
@@ -0,0 +1,197 @@
+from journal_lib.parse.lexwrapper import LexWrapper
+
+class JournalLexer(LexWrapper):
+    states = (
+        ('sHEADER', 'exclusive'),        # Entry header parsing state
+        ('sHEADEREFF', 'exclusive'),     # Entry header effective date parsing state
+        ('sENTRY', 'exclusive'),         # Entry parsing state
+        ('sBLOCKCOMMENT', 'exclusive'),  # Block comment parsing state
+        ('sACCOUNT', 'exclusive'),       # Account definition parsing state
+        ('sCOMMODITY', 'exclusive'),     # Commodity definition parsing state
+    )
+
+    reserved = {
+        "account": 'KW_ACCOUNT',
+        "commodity": 'KW_COMMODITY'
+    }
+
+    tokens = (
+        'CURRENCY',
+        'DATE',
+        'STATUS',
+        'TITLE',
+        'EFFECTIVE_DATE_SEPARATOR',
+        'ACCOUNT_NAME',
+        'AMOUNT',
+        'COMMENT',
+        'INLINE_COMMENT',
+        'COMMODITY_NOTE',
+        'COMMODITY_FORMAT',
+        'COMMODITY_NOMARKET',
+        'COMMODITY_DEFAULT',
+    ) + tuple(reserved.values())
+
+    t_ANY_ignore = ' \t'
+
+    literals = '\n'
+
+    # Rules for the 'initial' state
+
+    def t_INITIAL_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        self._state_begin('sHEADER', t)
+        return t
+
+    def t_INITIAL_eof(self, t):
+        pass
+
+    def t_INITIAL_COMMENT(self, t):
+        r'(;|\#|\%|\||\*).+\n'
+        pass
+
+    def t_INITIAL_BLOCKCOMMENT(self, t):
+        r'comment'
+        self._state_begin('sBLOCKCOMMENT', t)
+
+    def t_INITIAL_KEYWORD(self, t):
+        r'[a-zA-Z_][a-zA-Z_0-9]*'
+        t.type = self.reserved.get(t.value,'KW')
+        if t.type == "KW_ACCOUNT":
+            self._state_begin('sACCOUNT', t)
+        if t.type == "KW_COMMODITY":
+            self._state_begin('sCOMMODITY', t)
+        return t
+
+    # Rules for the 'sBLOCKCOMMENT' state
+
+    def t_sBLOCKCOMMENT_end(self, t):
+        r'end\scomment'
+        t.lexer.lineno += t.value.count('\n')
+        self._state_begin('INITIAL', t)
+
+    def t_sBLOCKCOMMENT_content(self, t):
+        r'.+?\n'
+
+    def t_sBLOCKCOMMENT_error(self, t):
+        r.lexer.skip(1)
+
+    # Rules for the 'sACCOUNT' state
+
+    def t_sACCOUNT_ACCOUNT_NAME(self, t):
+        r'("[^"]+")|([^\n;]+)'
+        if t.value.startswith('"') and t.value.endswith('"'):
+            t.value = t.value[1:-1]
+        t.value = t.value.rstrip()
+        return t
+
+    def t_sACCOUNT_COMMENT(self, t):
+        r'(;)[^\n]*'
+        return t
+
+    def t_sACCOUNT_newline(self, t):
+        r'\n'
+        self._state_begin('INITIAL', t)
+
+    # Rules for the 'sCOMMODITY' state
+
+    def t_sCOMMODITY_KW(self, t):
+        r'(note|format|nomarket|default)'
+        if t.value == "note":
+            t.type = "COMMODITY_NOTE"
+        elif t.value == "format":
+            t.type = "COMMODITY_FORMAT"
+        elif t.value == "nomarket":
+            t.type = "COMMODITY_NOMARKET"
+        elif t.value == "default":
+            t.type = "COMMODITY_DEFAULT"
+
+        return t
+
+    def t_sCOMMODITY_NAME(self, t):
+        r'[^\n]+'
+        return t
+
+    def t_sCOMMODITY_newline(self, t):
+        r'\n(?=(\s*\n|\s*$|[^\s]))'
+        self._state_begin('INITIAL', t)
+
+    # Rules for the 'sheader' state
+
+    def t_sHEADER_STATUS(self, t):
+        r'(\*|!)'
+        return t
+
+    def t_sHEADER_EFFECTIVE_DATE_SEPARATOR(self, t):
+        r'='
+        self._state_begin('sHEADEREFF', t)
+        return t
+
+    def t_sHEADER_TITLE(self, t):
+        r'[^\n]+'
+        if ((t.value.startswith('"') and t.value.endswith('"'))
+            or (t.value.startswith("'") and t.value.endswith("'"))):
+            t.value = t.value[1:-1] 
+        return t
+
+    def t_sHEADER_newline(self, t):
+        r'\n'
+        self._state_begin('sENTRY', t)
+
+    # Rules for the  'sheader_effective_date' state
+
+    def t_sHEADEREFF_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        self._state_begin('sHEADER', t)
+        return t
+
+    # Rules for the 'sentry' state
+
+    def t_sENTRY_DATE(self, t):
+        r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+        return t
+
+    def t_sENTRY_CURRENCY(self, t):
+        r'\$|NOK'
+        return t
+
+    def t_sENTRY_AMOUNT(self, t):
+        r'(-)?\d+(\.\d{2})?'
+        return t
+
+    def t_sENTRY_COMMENT(self, t):
+        r';[^\n]*'
+        # Check if the comment is at the start of a line (considering whitespaces)
+        line_start = t.lexer.lexdata.rfind('\n', 0, t.lexpos) + 1
+        pre_comment = t.lexer.lexdata[line_start:t.lexpos]
+        
+        # If the comment is at the start of a line, it's a standalone comment
+        if pre_comment.isspace() or pre_comment == "":
+            t.type = "COMMENT"
+        else:
+            t.type = "INLINE_COMMENT"
+        return t
+
+    def t_sENTRY_ACCOUNT_NAME(self, t):
+        r'[^\n;]+?(?=\s{2,}|$|;)'
+        if t.value.startswith('"') and t.value.endswith('"'):
+            t.value = t.value[1:-1]
+        t.value = t.value.rstrip()
+        return t
+
+    def t_sENTRY_newline(self, t):
+        r'\n\n'
+        self._state_begin('INITIAL', t)
+
+    def t_sENTRY_eof(self, t):
+        self._state_begin('INITIAL', t)
+
+    # Common rules
+
+    def t_ANY_newline(self, t):
+        r'\n+'
+        t.lexer.lineno += len(t.value)
+
+    def t_ANY_error(self, t):
+        self._hl_token(t)
+        t.lexer.skip(1)
+
diff --git a/src/journal_lib/parse/lexwrapper.py b/src/journal_lib/parse/lexwrapper.py
new file mode 100644
index 0000000..6a3989e
--- /dev/null
+++ b/src/journal_lib/parse/lexwrapper.py
@@ -0,0 +1,76 @@
+from .ply import lex
+import sys
+
+class LexWrapper(object):
+    state_trail = ["INITIAL"]
+
+    def _state_begin(self, state: str, t = None):
+        """ Convenient wrapper for the lexer.begin, which makes it possible to track state changes. """
+        self.lexer.begin(state)
+        self.state_trail.append(self.lexer.current_state())
+
+        if len(self.state_trail) > 5:
+            self.state_trail = self.state_trail[-5:]
+
+        if self.debug:
+            d = f"{' ':{self.max_token_name_length+2}}{self.state_trail[-2]} -> {self.state_trail[-1]}"
+            if t is not None:
+                d += ", recognized [{}] \"{}\"".format(t.type, t.value.replace("\n", "\\n"))
+            self.debuglog.info(d)
+
+    def __init__(self, debug: bool = False):
+        """ Initialize a new JournalLexer """
+        self.build(debug=debug)
+        self.debug = debug
+        if self.debug:
+            self.debuglog = lex.PlyLogger(sys.stderr)
+        self.max_token_name_length = max(len(x)+1 for x in self.tokens)
+
+    def build(self, **kwargs):
+        """ Reinitialize the lexer module (this is called on __init__) """
+        self.lexer = lex.lex(module=self, **kwargs)
+
+    def input(self, s: str):
+        """ Wrapper for the lex input function """
+        self.lexer.input(s)
+
+    def token(self):
+        """ Wrapper for the lex token function, can print debug information to stdout if debug is enabled """
+        tok = self.lexer.token()
+        if self.debug and tok:
+            self.debuglog.info("[{:<{width}} ({}:{}) \"{}\"".format(
+                tok.type + "]",
+                tok.lineno,
+                tok.lexpos,
+                tok.value.replace("\n", "\\n"),
+                width=self.max_token_name_length,
+            ))
+        return tok
+
+    def print_tokens(self, data):
+        """ Simple debugging function which will trigger a tokenization of all the data provided """
+        self.input(data)
+        _debug = self.debug
+        self.debug = True
+        while True:
+            self.lexer.token()
+        self.debug = _debug
+
+    def _hl_token(self, t):
+        try:
+            linestart = t.lexer.lexdata.rfind("\n", 0, t.lexpos) + 1
+            lineend = t.lexer.lexdata.find("\n", t.lexpos)
+            markpos = t.lexpos - linestart
+            lineno = t.lexer.lexdata[0:linestart+1].count("\n")
+            print(f"Illegal character at '{t.value[0]}' on line {lineno}, position {markpos}")
+            print(f"    {t.lexer.lexdata[linestart:lineend]}")
+            print(f"    {' ' * markpos}^")
+        except Exception as e:
+            print(f"Illegal character '{p.value}'")
+            print(f"Additionally a error occuren when showing the position of the illegal character\n{e}")
+
+    @property
+    def lexdata(self):
+        if hasattr(self, "lexer"):
+            return self.lexer.lexdata
+        return None
diff --git a/src/journal_lib/parse/parsers/__init__.py b/src/journal_lib/parse/parsers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/journal_lib/parse/parsers/__init__.py
diff --git a/src/journal_lib/parse/parsers/p_ledger.py b/src/journal_lib/parse/parsers/p_ledger.py
new file mode 100644
index 0000000..ad01a79
--- /dev/null
+++ b/src/journal_lib/parse/parsers/p_ledger.py
@@ -0,0 +1,134 @@
+from datetime import datetime
+
+from journal_lib.parse.parsewrapper import ParseWrapper
+from journal_lib.parse.lexers.l_ledger import JournalLexer
+from journal_lib.dataclasses import Journal, JournalEntry, JournalEntryTransaction, JournalAccountDef, JournalCommodityDef
+
+class JournalParser(ParseWrapper):
+    tokens = JournalLexer.tokens
+
+    def p_journal(self, p):
+        '''journal : elements '''
+        p[0] = Journal.from_elements(p[1])
+
+        if self.debug:
+            for x in p[1]:
+                print(repr(x))
+
+    def p_elements(self, p):
+        '''elements : elements element
+                    | element'''
+        if len(p) == 3:
+            p[0] = p[1] + [p[2]]
+        else:
+            p[0] = [p[1]]
+
+    def p_element_entry(self, p):
+        '''element : DATE effective_date status TEXT transactions'''
+        p[0] = JournalEntry(
+            date = p[1],
+            effective_date = p[2],
+            cleared = p[3]['cleared'],
+            pending = p[3]['pending'],
+            title = p[4],
+            transactions = p[5]['transactions'],
+            comments = p[5]['comments']
+        )
+
+    def p_element_account(self, p):
+        '''element : KW_ACCOUNT TEXT COMMENT
+                   | KW_ACCOUNT TEXT'''
+        p[0] = JournalAccountDef(account=p[2], comment=p[3] if len(p) > 3 else None)
+
+    def p_element_commodity(self, p):
+        '''element : KW_COMMODITY TEXT commodity_attributes '''
+        p[0] = JournalCommodityDef(commodity=p[2], **p[3])
+
+    def p_commodity_attributes(self, p):
+        '''commodity_attributes : COMMODITY_FORMAT TEXT commodity_attributes
+                                | COMMODITY_NOTE TEXT commodity_attributes
+                                | COMMODITY_NOMARKET commodity_attributes
+                                | COMMODITY_DEFAULT commodity_attributes
+                                | empty
+        '''
+        p[0] = {}
+        if p[1] == "format":
+            p[0]["format"] = p[2]
+        elif p[1] == "note":
+            p[0]["note"] = p[2]
+        elif p[1] == "nomarket":
+            p[0]["nomarket"] = True
+        elif p[1] == "default":
+            p[0]["default"] = True
+        if len(p) > 3 and p[3]:
+            p[0].update(p[3])
+
+    def p_effective_date(self, p):
+        '''effective_date : ENTRY_EFFECTIVE_DATE_SEPARATOR DATE
+                          | empty
+        '''
+        p[0] = p[2] if p[1] else None
+
+    def p_status(self, p):
+        '''status : ENTRY_STATUS
+                  | empty
+        '''
+        p[0] = {'cleared': p[1] == "*", 'pending': p[1] == "!"} if p[1] else {'cleared': False, 'pending': False}
+
+    def p_empty(self, p):
+        '''empty :'''
+        pass
+
+    def p_transactions(self, p):
+        '''transactions : transactions transaction'''
+        p[0] = {"comments": p[1]['comments'],
+                "transactions": p[1]['transactions'] + [p[2]]}
+
+    def p_comments(self, p):
+        '''transactions : transactions COMMENT'''
+        p[0] = {"comments": p[1]['comments'] + [p[2]],
+                "transactions": p[1]['transactions']}
+
+    def p_transactions_single(self, p):
+        '''transactions : transaction '''
+        p[0] = {"comments": [], "transactions": [p[1]]}
+
+    def p_transactions_comment_single(self, p):
+        '''transactions : COMMENT '''
+        p[0] = {"comments": [p[1]], "transactions": []}
+
+    def p_amount_prefixed(self, p):
+        '''amount : CURRENCY AMOUNT'''
+        p[0] = {'currency': p[1], 'amount': p[2]}
+
+    def p_amount_suffixed(self, p):
+        '''amount : AMOUNT CURRENCY'''
+        p[0] = {'currency': p[2], 'amount': p[1]}
+
+    def p_transaction_with_amount(self, p):
+        '''transaction : TEXT amount INLINE_COMMENT
+                       | TEXT amount'''
+
+        p[0] = JournalEntryTransaction(
+            account = p[1],
+            currency = p[2]['currency'],
+            amount = p[2]['amount'],
+            comment = p[3] if len(p) > 3 else None
+        )
+
+    def p_transaction_without_amount(self, p):
+        '''transaction : TEXT INLINE_COMMENT
+                       | TEXT'''
+        p[0] = JournalEntryTransaction(
+            account = p[1],
+            currency = None,
+            amount = None,
+            comment = p[2] if len(p) > 2 else None
+        )
+
+    def p_error(self, p):
+        if p:
+            self._hl_token(p)
+        else:
+            print("Syntax error at EOF")
+
diff --git a/src/journal_lib/parse/parsewrapper.py b/src/journal_lib/parse/parsewrapper.py
new file mode 100644
index 0000000..c75b32c
--- /dev/null
+++ b/src/journal_lib/parse/parsewrapper.py
@@ -0,0 +1,29 @@
+from .ply import yacc
+
+class ParseWrapper(object):
+
+    def __init__(self, tokens = None, debug: bool = False):
+        if tokens is not None:
+            self.tokens = tokens
+        self.debug = debug
+        self.build(debug=debug)
+
+    def build(self, **kwargs):
+        self.parser = yacc.yacc(module=self, **kwargs)
+
+    def parse(self, *args, **kwargs):
+        return self.parser.parse(*args, **kwargs)
+
+    def _hl_token(self, p):
+        try:
+            linestart = p.lexer.lexdata.rfind("\n", 0, p.lexpos) + 1
+            lineend = p.lexer.lexdata.find("\n", p.lexpos)
+            markpos = p.lexpos - linestart
+            marklen = len(str(p.value))
+            lineno = p.lexer.lexdata[0:linestart+1].count("\n")
+            print(f"Syntax error at '{p.value}' on line {lineno}, position {markpos}")
+            print(f"    {p.lexer.lexdata[linestart:lineend]}")
+            print(f"    {' ' * markpos}{'^' * marklen}")
+        except Exception as e:
+            print(f"An error occured when showing the position of token {p}\n{e}")
+
diff --git a/src/journal_lib/parse/ply/__init__.py b/src/journal_lib/parse/ply/__init__.py
new file mode 100644
index 0000000..45f28c5
--- /dev/null
+++ b/src/journal_lib/parse/ply/__init__.py
@@ -0,0 +1,5 @@
+# PLY package
+# Author: David Beazley (dave@dabeaz.com)
+# https://github.com/dabeaz/ply
+
+__version__ = '2022.10.27'
diff --git a/src/journal_lib/parse/ply/lex.py b/src/journal_lib/parse/ply/lex.py
new file mode 100644
index 0000000..de011fe
--- /dev/null
+++ b/src/journal_lib/parse/ply/lex.py
@@ -0,0 +1,901 @@
+# -----------------------------------------------------------------------------
+# ply: lex.py
+#
+# Copyright (C) 2001-2022
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Latest version: https://github.com/dabeaz/ply
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+
+import re
+import sys
+import types
+import copy
+import os
+import inspect
+
+# This tuple contains acceptable string types
+StringTypes = (str, bytes)
+
+# This regular expression is used to match valid token names
+_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
+
+# Exception thrown when invalid token encountered and no default error
+# handler is defined.
+class LexError(Exception):
+    def __init__(self, message, s):
+        self.args = (message,)
+        self.text = s
+
+# Token class.  This class is used to represent the tokens produced.
+class LexToken(object):
+    def __repr__(self):
+        return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})'
+
+# This object is a stand-in for a logging object created by the
+# logging module.
+
+class PlyLogger(object):
+    def __init__(self, f):
+        self.f = f
+
+    def critical(self, msg, *args, **kwargs):
+        self.f.write((msg % args) + '\n')
+
+    def warning(self, msg, *args, **kwargs):
+        self.f.write('WARNING: ' + (msg % args) + '\n')
+
+    def error(self, msg, *args, **kwargs):
+        self.f.write('ERROR: ' + (msg % args) + '\n')
+
+    info = critical
+    debug = critical
+
+# -----------------------------------------------------------------------------
+#                        === Lexing Engine ===
+#
+# The following Lexer class implements the lexer runtime.   There are only
+# a few public methods and attributes:
+#
+#    input()          -  Store a new string in the lexer
+#    token()          -  Get the next token
+#    clone()          -  Clone the lexer
+#
+#    lineno           -  Current line number
+#    lexpos           -  Current position in the input string
+# -----------------------------------------------------------------------------
+
+class Lexer:
+    def __init__(self):
+        self.lexre = None             # Master regular expression. This is a list of
+                                      # tuples (re, findex) where re is a compiled
+                                      # regular expression and findex is a list
+                                      # mapping regex group numbers to rules
+        self.lexretext = None         # Current regular expression strings
+        self.lexstatere = {}          # Dictionary mapping lexer states to master regexs
+        self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings
+        self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names
+        self.lexstate = 'INITIAL'     # Current lexer state
+        self.lexstatestack = []       # Stack of lexer states
+        self.lexstateinfo = None      # State information
+        self.lexstateignore = {}      # Dictionary of ignored characters for each state
+        self.lexstateerrorf = {}      # Dictionary of error functions for each state
+        self.lexstateeoff = {}        # Dictionary of eof functions for each state
+        self.lexreflags = 0           # Optional re compile flags
+        self.lexdata = None           # Actual input data (as a string)
+        self.lexpos = 0               # Current position in input text
+        self.lexlen = 0               # Length of the input text
+        self.lexerrorf = None         # Error rule (if any)
+        self.lexeoff = None           # EOF rule (if any)
+        self.lextokens = None         # List of valid tokens
+        self.lexignore = ''           # Ignored characters
+        self.lexliterals = ''         # Literal characters that can be passed through
+        self.lexmodule = None         # Module
+        self.lineno = 1               # Current line number
+
+    def clone(self, object=None):
+        c = copy.copy(self)
+
+        # If the object parameter has been supplied, it means we are attaching the
+        # lexer to a new object.  In this case, we have to rebind all methods in
+        # the lexstatere and lexstateerrorf tables.
+
+        if object:
+            newtab = {}
+            for key, ritem in self.lexstatere.items():
+                newre = []
+                for cre, findex in ritem:
+                    newfindex = []
+                    for f in findex:
+                        if not f or not f[0]:
+                            newfindex.append(f)
+                            continue
+                        newfindex.append((getattr(object, f[0].__name__), f[1]))
+                newre.append((cre, newfindex))
+                newtab[key] = newre
+            c.lexstatere = newtab
+            c.lexstateerrorf = {}
+            for key, ef in self.lexstateerrorf.items():
+                c.lexstateerrorf[key] = getattr(object, ef.__name__)
+            c.lexmodule = object
+        return c
+
+    # ------------------------------------------------------------
+    # input() - Push a new string into the lexer
+    # ------------------------------------------------------------
+    def input(self, s):
+        self.lexdata = s
+        self.lexpos = 0
+        self.lexlen = len(s)
+
+    # ------------------------------------------------------------
+    # begin() - Changes the lexing state
+    # ------------------------------------------------------------
+    def begin(self, state):
+        if state not in self.lexstatere:
+            raise ValueError(f'Undefined state {state!r}')
+        self.lexre = self.lexstatere[state]
+        self.lexretext = self.lexstateretext[state]
+        self.lexignore = self.lexstateignore.get(state, '')
+        self.lexerrorf = self.lexstateerrorf.get(state, None)
+        self.lexeoff = self.lexstateeoff.get(state, None)
+        self.lexstate = state
+
+    # ------------------------------------------------------------
+    # push_state() - Changes the lexing state and saves old on stack
+    # ------------------------------------------------------------
+    def push_state(self, state):
+        self.lexstatestack.append(self.lexstate)
+        self.begin(state)
+
+    # ------------------------------------------------------------
+    # pop_state() - Restores the previous state
+    # ------------------------------------------------------------
+    def pop_state(self):
+        self.begin(self.lexstatestack.pop())
+
+    # ------------------------------------------------------------
+    # current_state() - Returns the current lexing state
+    # ------------------------------------------------------------
+    def current_state(self):
+        return self.lexstate
+
+    # ------------------------------------------------------------
+    # skip() - Skip ahead n characters
+    # ------------------------------------------------------------
+    def skip(self, n):
+        self.lexpos += n
+
+    # ------------------------------------------------------------
+    # token() - Return the next token from the Lexer
+    #
+    # Note: This function has been carefully implemented to be as fast
+    # as possible.  Don't make changes unless you really know what
+    # you are doing
+    # ------------------------------------------------------------
+    def token(self):
+        # Make local copies of frequently referenced attributes
+        lexpos    = self.lexpos
+        lexlen    = self.lexlen
+        lexignore = self.lexignore
+        lexdata   = self.lexdata
+
+        while lexpos < lexlen:
+            # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
+            if lexdata[lexpos] in lexignore:
+                lexpos += 1
+                continue
+
+            # Look for a regular expression match
+            for lexre, lexindexfunc in self.lexre:
+                m = lexre.match(lexdata, lexpos)
+                if not m:
+                    continue
+
+                # Create a token for return
+                tok = LexToken()
+                tok.value = m.group()
+                tok.lineno = self.lineno
+                tok.lexpos = lexpos
+
+                i = m.lastindex
+                func, tok.type = lexindexfunc[i]
+
+                if not func:
+                    # If no token type was set, it's an ignored token
+                    if tok.type:
+                        self.lexpos = m.end()
+                        return tok
+                    else:
+                        lexpos = m.end()
+                        break
+
+                lexpos = m.end()
+
+                # If token is processed by a function, call it
+
+                tok.lexer = self      # Set additional attributes useful in token rules
+                self.lexmatch = m
+                self.lexpos = lexpos
+                newtok = func(tok)
+                del tok.lexer
+                del self.lexmatch
+
+                # Every function must return a token, if nothing, we just move to next token
+                if not newtok:
+                    lexpos    = self.lexpos         # This is here in case user has updated lexpos.
+                    lexignore = self.lexignore      # This is here in case there was a state change
+                    break
+                return newtok
+            else:
+                # No match, see if in literals
+                if lexdata[lexpos] in self.lexliterals:
+                    tok = LexToken()
+                    tok.value = lexdata[lexpos]
+                    tok.lineno = self.lineno
+                    tok.type = tok.value
+                    tok.lexpos = lexpos
+                    self.lexpos = lexpos + 1
+                    return tok
+
+                # No match. Call t_error() if defined.
+                if self.lexerrorf:
+                    tok = LexToken()
+                    tok.value = self.lexdata[lexpos:]
+                    tok.lineno = self.lineno
+                    tok.type = 'error'
+                    tok.lexer = self
+                    tok.lexpos = lexpos
+                    self.lexpos = lexpos
+                    newtok = self.lexerrorf(tok)
+                    if lexpos == self.lexpos:
+                        # Error method didn't change text position at all. This is an error.
+                        raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}",
+                                       lexdata[lexpos:])
+                    lexpos = self.lexpos
+                    if not newtok:
+                        continue
+                    return newtok
+
+                self.lexpos = lexpos
+                raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}",
+                               lexdata[lexpos:])
+
+        if self.lexeoff:
+            tok = LexToken()
+            tok.type = 'eof'
+            tok.value = ''
+            tok.lineno = self.lineno
+            tok.lexpos = lexpos
+            tok.lexer = self
+            self.lexpos = lexpos
+            newtok = self.lexeoff(tok)
+            return newtok
+
+        self.lexpos = lexpos + 1
+        if self.lexdata is None:
+            raise RuntimeError('No input string given with input()')
+        return None
+
+    # Iterator interface
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        t = self.token()
+        if t is None:
+            raise StopIteration
+        return t
+
+# -----------------------------------------------------------------------------
+#                           ==== Lex Builder ===
+#
+# The functions and classes below are used to collect lexing information
+# and build a Lexer object from it.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# _get_regex(func)
+#
+# Returns the regular expression assigned to a function either as a doc string
+# or as a .regex attribute attached by the @TOKEN decorator.
+# -----------------------------------------------------------------------------
+def _get_regex(func):
+    return getattr(func, 'regex', func.__doc__)
+
+# -----------------------------------------------------------------------------
+# get_caller_module_dict()
+#
+# This function returns a dictionary containing all of the symbols defined within
+# a caller further down the call stack.  This is used to get the environment
+# associated with the yacc() call if none was provided.
+# -----------------------------------------------------------------------------
+def get_caller_module_dict(levels):
+    f = sys._getframe(levels)
+    return { **f.f_globals, **f.f_locals }
+
+# -----------------------------------------------------------------------------
+# _form_master_re()
+#
+# This function takes a list of all of the regex components and attempts to
+# form the master regular expression.  Given limitations in the Python re
+# module, it may be necessary to break the master regex into separate expressions.
+# -----------------------------------------------------------------------------
+def _form_master_re(relist, reflags, ldict, toknames):
+    if not relist:
+        return [], [], []
+    regex = '|'.join(relist)
+    try:
+        lexre = re.compile(regex, reflags)
+
+        # Build the index to function map for the matching engine
+        lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
+        lexindexnames = lexindexfunc[:]
+
+        for f, i in lexre.groupindex.items():
+            handle = ldict.get(f, None)
+            if type(handle) in (types.FunctionType, types.MethodType):
+                lexindexfunc[i] = (handle, toknames[f])
+                lexindexnames[i] = f
+            elif handle is not None:
+                lexindexnames[i] = f
+                if f.find('ignore_') > 0:
+                    lexindexfunc[i] = (None, None)
+                else:
+                    lexindexfunc[i] = (None, toknames[f])
+
+        return [(lexre, lexindexfunc)], [regex], [lexindexnames]
+    except Exception:
+        m = (len(relist) // 2) + 1
+        llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames)
+        rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames)
+        return (llist+rlist), (lre+rre), (lnames+rnames)
+
+# -----------------------------------------------------------------------------
+# def _statetoken(s,names)
+#
+# Given a declaration name s of the form "t_" and a dictionary whose keys are
+# state names, this function returns a tuple (states,tokenname) where states
+# is a tuple of state names and tokenname is the name of the token.  For example,
+# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
+# -----------------------------------------------------------------------------
+def _statetoken(s, names):
+    parts = s.split('_')
+    for i, part in enumerate(parts[1:], 1):
+        if part not in names and part != 'ANY':
+            break
+
+    if i > 1:
+        states = tuple(parts[1:i])
+    else:
+        states = ('INITIAL',)
+
+    if 'ANY' in states:
+        states = tuple(names)
+
+    tokenname = '_'.join(parts[i:])
+    return (states, tokenname)
+
+
+# -----------------------------------------------------------------------------
+# LexerReflect()
+#
+# This class represents information needed to build a lexer as extracted from a
+# user's input file.
+# -----------------------------------------------------------------------------
+class LexerReflect(object):
+    def __init__(self, ldict, log=None, reflags=0):
+        self.ldict      = ldict
+        self.error_func = None
+        self.tokens     = []
+        self.reflags    = reflags
+        self.stateinfo  = {'INITIAL': 'inclusive'}
+        self.modules    = set()
+        self.error      = False
+        self.log        = PlyLogger(sys.stderr) if log is None else log
+
+    # Get all of the basic information
+    def get_all(self):
+        self.get_tokens()
+        self.get_literals()
+        self.get_states()
+        self.get_rules()
+
+    # Validate all of the information
+    def validate_all(self):
+        self.validate_tokens()
+        self.validate_literals()
+        self.validate_rules()
+        return self.error
+
+    # Get the tokens map
+    def get_tokens(self):
+        tokens = self.ldict.get('tokens', None)
+        if not tokens:
+            self.log.error('No token list is defined')
+            self.error = True
+            return
+
+        if not isinstance(tokens, (list, tuple)):
+            self.log.error('tokens must be a list or tuple')
+            self.error = True
+            return
+
+        if not tokens:
+            self.log.error('tokens is empty')
+            self.error = True
+            return
+
+        self.tokens = tokens
+
+    # Validate the tokens
+    def validate_tokens(self):
+        terminals = {}
+        for n in self.tokens:
+            if not _is_identifier.match(n):
+                self.log.error(f"Bad token name {n!r}")
+                self.error = True
+            if n in terminals:
+                self.log.warning(f"Token {n!r} multiply defined")
+            terminals[n] = 1
+
+    # Get the literals specifier
+    def get_literals(self):
+        self.literals = self.ldict.get('literals', '')
+        if not self.literals:
+            self.literals = ''
+
+    # Validate literals
+    def validate_literals(self):
+        try:
+            for c in self.literals:
+                if not isinstance(c, StringTypes) or len(c) > 1:
+                    self.log.error(f'Invalid literal {c!r}. Must be a single character')
+                    self.error = True
+
+        except TypeError:
+            self.log.error('Invalid literals specification. literals must be a sequence of characters')
+            self.error = True
+
+    def get_states(self):
+        self.states = self.ldict.get('states', None)
+        # Build statemap
+        if self.states:
+            if not isinstance(self.states, (tuple, list)):
+                self.log.error('states must be defined as a tuple or list')
+                self.error = True
+            else:
+                for s in self.states:
+                    if not isinstance(s, tuple) or len(s) != 2:
+                        self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s)
+                        self.error = True
+                        continue
+                    name, statetype = s
+                    if not isinstance(name, StringTypes):
+                        self.log.error('State name %r must be a string', name)
+                        self.error = True
+                        continue
+                    if not (statetype == 'inclusive' or statetype == 'exclusive'):
+                        self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name)
+                        self.error = True
+                        continue
+                    if name in self.stateinfo:
+                        self.log.error("State %r already defined", name)
+                        self.error = True
+                        continue
+                    self.stateinfo[name] = statetype
+
+    # Get all of the symbols with a t_ prefix and sort them into various
+    # categories (functions, strings, error functions, and ignore characters)
+
+    def get_rules(self):
+        tsymbols = [f for f in self.ldict if f[:2] == 't_']
+
+        # Now build up a list of functions and a list of strings
+        self.toknames = {}        # Mapping of symbols to token names
+        self.funcsym  = {}        # Symbols defined as functions
+        self.strsym   = {}        # Symbols defined as strings
+        self.ignore   = {}        # Ignore strings by state
+        self.errorf   = {}        # Error functions by state
+        self.eoff     = {}        # EOF functions by state
+
+        for s in self.stateinfo:
+            self.funcsym[s] = []
+            self.strsym[s] = []
+
+        if len(tsymbols) == 0:
+            self.log.error('No rules of the form t_rulename are defined')
+            self.error = True
+            return
+
+        for f in tsymbols:
+            t = self.ldict[f]
+            states, tokname = _statetoken(f, self.stateinfo)
+            self.toknames[f] = tokname
+
+            if hasattr(t, '__call__'):
+                if tokname == 'error':
+                    for s in states:
+                        self.errorf[s] = t
+                elif tokname == 'eof':
+                    for s in states:
+                        self.eoff[s] = t
+                elif tokname == 'ignore':
+                    line = t.__code__.co_firstlineno
+                    file = t.__code__.co_filename
+                    self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__)
+                    self.error = True
+                else:
+                    for s in states:
+                        self.funcsym[s].append((f, t))
+            elif isinstance(t, StringTypes):
+                if tokname == 'ignore':
+                    for s in states:
+                        self.ignore[s] = t
+                    if '\\' in t:
+                        self.log.warning("%s contains a literal backslash '\\'", f)
+
+                elif tokname == 'error':
+                    self.log.error("Rule %r must be defined as a function", f)
+                    self.error = True
+                else:
+                    for s in states:
+                        self.strsym[s].append((f, t))
+            else:
+                self.log.error('%s not defined as a function or string', f)
+                self.error = True
+
+        # Sort the functions by line number
+        for f in self.funcsym.values():
+            f.sort(key=lambda x: x[1].__code__.co_firstlineno)
+
+        # Sort the strings by regular expression length
+        for s in self.strsym.values():
+            s.sort(key=lambda x: len(x[1]), reverse=True)
+
+    # Validate all of the t_rules collected
+    def validate_rules(self):
+        for state in self.stateinfo:
+            # Validate all rules defined by functions
+
+            for fname, f in self.funcsym[state]:
+                line = f.__code__.co_firstlineno
+                file = f.__code__.co_filename
+                module = inspect.getmodule(f)
+                self.modules.add(module)
+
+                tokname = self.toknames[fname]
+                if isinstance(f, types.MethodType):
+                    reqargs = 2
+                else:
+                    reqargs = 1
+                nargs = f.__code__.co_argcount
+                if nargs > reqargs:
+                    self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                if nargs < reqargs:
+                    self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                if not _get_regex(f):
+                    self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                try:
+                    c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
+                    if c.match(''):
+                        self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__)
+                        self.error = True
+                except re.error as e:
+                    self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e)
+                    if '#' in _get_regex(f):
+                        self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__)
+                    self.error = True
+
+            # Validate all rules defined by strings
+            for name, r in self.strsym[state]:
+                tokname = self.toknames[name]
+                if tokname == 'error':
+                    self.log.error("Rule %r must be defined as a function", name)
+                    self.error = True
+                    continue
+
+                if tokname not in self.tokens and tokname.find('ignore_') < 0:
+                    self.log.error("Rule %r defined for an unspecified token %s", name, tokname)
+                    self.error = True
+                    continue
+
+                try:
+                    c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
+                    if (c.match('')):
+                        self.log.error("Regular expression for rule %r matches empty string", name)
+                        self.error = True
+                except re.error as e:
+                    self.log.error("Invalid regular expression for rule %r. %s", name, e)
+                    if '#' in r:
+                        self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name)
+                    self.error = True
+
+            if not self.funcsym[state] and not self.strsym[state]:
+                self.log.error("No rules defined for state %r", state)
+                self.error = True
+
+            # Validate the error function
+            efunc = self.errorf.get(state, None)
+            if efunc:
+                f = efunc
+                line = f.__code__.co_firstlineno
+                file = f.__code__.co_filename
+                module = inspect.getmodule(f)
+                self.modules.add(module)
+
+                if isinstance(f, types.MethodType):
+                    reqargs = 2
+                else:
+                    reqargs = 1
+                nargs = f.__code__.co_argcount
+                if nargs > reqargs:
+                    self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
+                    self.error = True
+
+                if nargs < reqargs:
+                    self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
+                    self.error = True
+
+        for module in self.modules:
+            self.validate_module(module)
+
+    # -----------------------------------------------------------------------------
+    # validate_module()
+    #
+    # This checks to see if there are duplicated t_rulename() functions or strings
+    # in the parser input file.  This is done using a simple regular expression
+    # match on each line in the source code of the given module.
+    # -----------------------------------------------------------------------------
+
+    def validate_module(self, module):
+        try:
+            lines, linen = inspect.getsourcelines(module)
+        except IOError:
+            return
+
+        fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
+        sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
+
+        counthash = {}
+        linen += 1
+        for line in lines:
+            m = fre.match(line)
+            if not m:
+                m = sre.match(line)
+            if m:
+                name = m.group(1)
+                prev = counthash.get(name)
+                if not prev:
+                    counthash[name] = linen
+                else:
+                    filename = inspect.getsourcefile(module)
+                    self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev)
+                    self.error = True
+            linen += 1
+
+# -----------------------------------------------------------------------------
+# lex(module)
+#
+# Build all of the regular expression rules from definitions in the supplied module
+# -----------------------------------------------------------------------------
+def lex(*, module=None, object=None, debug=False, 
+        reflags=int(re.VERBOSE), debuglog=None, errorlog=None):
+
+    global lexer
+
+    ldict = None
+    stateinfo  = {'INITIAL': 'inclusive'}
+    lexobj = Lexer()
+    global token, input
+
+    if errorlog is None:
+        errorlog = PlyLogger(sys.stderr)
+
+    if debug:
+        if debuglog is None:
+            debuglog = PlyLogger(sys.stderr)
+
+    # Get the module dictionary used for the lexer
+    if object:
+        module = object
+
+    # Get the module dictionary used for the parser
+    if module:
+        _items = [(k, getattr(module, k)) for k in dir(module)]
+        ldict = dict(_items)
+        # If no __file__ attribute is available, try to obtain it from the __module__ instead
+        if '__file__' not in ldict:
+            ldict['__file__'] = sys.modules[ldict['__module__']].__file__
+    else:
+        ldict = get_caller_module_dict(2)
+
+    # Collect parser information from the dictionary
+    linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)
+    linfo.get_all()
+    if linfo.validate_all():
+        raise SyntaxError("Can't build lexer")
+
+    # Dump some basic debugging information
+    if debug:
+        debuglog.info('lex: tokens   = %r', linfo.tokens)
+        debuglog.info('lex: literals = %r', linfo.literals)
+        debuglog.info('lex: states   = %r', linfo.stateinfo)
+
+    # Build a dictionary of valid token names
+    lexobj.lextokens = set()
+    for n in linfo.tokens:
+        lexobj.lextokens.add(n)
+
+    # Get literals specification
+    if isinstance(linfo.literals, (list, tuple)):
+        lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
+    else:
+        lexobj.lexliterals = linfo.literals
+
+    lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals)
+
+    # Get the stateinfo dictionary
+    stateinfo = linfo.stateinfo
+
+    regexs = {}
+    # Build the master regular expressions
+    for state in stateinfo:
+        regex_list = []
+
+        # Add rules defined by functions first
+        for fname, f in linfo.funcsym[state]:
+            regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f)))
+            if debug:
+                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)
+
+        # Now add all of the simple rules
+        for name, r in linfo.strsym[state]:
+            regex_list.append('(?P<%s>%s)' % (name, r))
+            if debug:
+                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)
+
+        regexs[state] = regex_list
+
+    # Build the master regular expressions
+
+    if debug:
+        debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')
+
+    for state in regexs:
+        lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames)
+        lexobj.lexstatere[state] = lexre
+        lexobj.lexstateretext[state] = re_text
+        lexobj.lexstaterenames[state] = re_names
+        if debug:
+            for i, text in enumerate(re_text):
+                debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text)
+
+    # For inclusive states, we need to add the regular expressions from the INITIAL state
+    for state, stype in stateinfo.items():
+        if state != 'INITIAL' and stype == 'inclusive':
+            lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
+            lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
+            lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
+
+    lexobj.lexstateinfo = stateinfo
+    lexobj.lexre = lexobj.lexstatere['INITIAL']
+    lexobj.lexretext = lexobj.lexstateretext['INITIAL']
+    lexobj.lexreflags = reflags
+
+    # Set up ignore variables
+    lexobj.lexstateignore = linfo.ignore
+    lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')
+
+    # Set up error functions
+    lexobj.lexstateerrorf = linfo.errorf
+    lexobj.lexerrorf = linfo.errorf.get('INITIAL', None)
+    if not lexobj.lexerrorf:
+        errorlog.warning('No t_error rule is defined')
+
+    # Set up eof functions
+    lexobj.lexstateeoff = linfo.eoff
+    lexobj.lexeoff = linfo.eoff.get('INITIAL', None)
+
+    # Check state information for ignore and error rules
+    for s, stype in stateinfo.items():
+        if stype == 'exclusive':
+            if s not in linfo.errorf:
+                errorlog.warning("No error rule is defined for exclusive state %r", s)
+            if s not in linfo.ignore and lexobj.lexignore:
+                errorlog.warning("No ignore rule is defined for exclusive state %r", s)
+        elif stype == 'inclusive':
+            if s not in linfo.errorf:
+                linfo.errorf[s] = linfo.errorf.get('INITIAL', None)
+            if s not in linfo.ignore:
+                linfo.ignore[s] = linfo.ignore.get('INITIAL', '')
+
+    # Create global versions of the token() and input() functions
+    token = lexobj.token
+    input = lexobj.input
+    lexer = lexobj
+
+    return lexobj
+
+# -----------------------------------------------------------------------------
+# runmain()
+#
+# This runs the lexer as a main program
+# -----------------------------------------------------------------------------
+
+def runmain(lexer=None, data=None):
+    if not data:
+        try:
+            filename = sys.argv[1]
+            with open(filename) as f:
+                data = f.read()
+        except IndexError:
+            sys.stdout.write('Reading from standard input (type EOF to end):\n')
+            data = sys.stdin.read()
+
+    if lexer:
+        _input = lexer.input
+    else:
+        _input = input
+    _input(data)
+    if lexer:
+        _token = lexer.token
+    else:
+        _token = token
+
+    while True:
+        tok = _token()
+        if not tok:
+            break
+        sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n')
+
+# -----------------------------------------------------------------------------
+# @TOKEN(regex)
+#
+# This decorator function can be used to set the regex expression on a function
+# when its docstring might need to be set in an alternative way
+# -----------------------------------------------------------------------------
+
+def TOKEN(r):
+    def set_regex(f):
+        if hasattr(r, '__call__'):
+            f.regex = _get_regex(r)
+        else:
+            f.regex = r
+        return f
+    return set_regex
diff --git a/src/journal_lib/parse/ply/yacc.py b/src/journal_lib/parse/ply/yacc.py
new file mode 100644
index 0000000..6528796
--- /dev/null
+++ b/src/journal_lib/parse/ply/yacc.py
@@ -0,0 +1,2482 @@
+# -----------------------------------------------------------------------------
+# ply: yacc.py
+#
+# Copyright (C) 2001-2022
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Latest version: https://github.com/dabeaz/ply
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+#
+# This implements an LR parser that is constructed from grammar rules defined
+# as Python functions. The grammar is specified by supplying the BNF inside
+# Python documentation strings.  The inspiration for this technique was borrowed
+# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
+# Spark and the GNU bison utility.
+#
+# The current implementation is only somewhat object-oriented. The
+# LR parser itself is defined in terms of an object (which allows multiple
+# parsers to co-exist).  However, most of the variables used during table
+# construction are defined in terms of global variables.  Users shouldn't
+# notice unless they are trying to define multiple parsers at the same
+# time using threads (in which case they should have their head examined).
+#
+# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
+# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
+# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
+# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
+# by the more efficient DeRemer and Pennello algorithm.
+#
+# :::::::: WARNING :::::::
+#
+# Construction of LR parsing tables is fairly complicated and expensive.
+# To make this module run fast, a *LOT* of work has been put into
+# optimization---often at the expensive of readability and what might
+# consider to be good Python "coding style."   Modify the code at your
+# own risk!
+# ----------------------------------------------------------------------------
+
+import re
+import types
+import sys
+import inspect
+
+#-----------------------------------------------------------------------------
+#                     === User configurable parameters ===
+#
+# Change these to modify the default behavior of yacc (if you wish)
+#-----------------------------------------------------------------------------
+
+yaccdebug   = False            # Debugging mode.  If set, yacc generates a
+                               # a 'parser.out' file in the current directory
+
+debug_file  = 'parser.out'     # Default name of the debugging file
+error_count = 3                # Number of symbols that must be shifted to leave recovery mode
+resultlimit = 40               # Size limit of results when running in debug mode.
+
+MAXINT = sys.maxsize
+
+# This object is a stand-in for a logging object created by the
+# logging module.   PLY will use this by default to create things
+# such as the parser.out file.  If a user wants more detailed
+# information, they can create their own logging object and pass
+# it into PLY.
+
+class PlyLogger(object):
+    def __init__(self, f):
+        self.f = f
+
+    def debug(self, msg, *args, **kwargs):
+        self.f.write((msg % args) + '\n')
+
+    info = debug
+
+    def warning(self, msg, *args, **kwargs):
+        self.f.write('WARNING: ' + (msg % args) + '\n')
+
+    def error(self, msg, *args, **kwargs):
+        self.f.write('ERROR: ' + (msg % args) + '\n')
+
+    critical = debug
+
+# Null logger is used when no output is generated. Does nothing.
+class NullLogger(object):
+    def __getattribute__(self, name):
+        return self
+
+    def __call__(self, *args, **kwargs):
+        return self
+
+# Exception raised for yacc-related errors
+class YaccError(Exception):
+    pass
+
+# Format the result message that the parser produces when running in debug mode.
+def format_result(r):
+    repr_str = repr(r)
+    if '\n' in repr_str:
+        repr_str = repr(repr_str)
+    if len(repr_str) > resultlimit:
+        repr_str = repr_str[:resultlimit] + ' ...'
+    result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
+    return result
+
+# Format stack entries when the parser is running in debug mode
+def format_stack_entry(r):
+    repr_str = repr(r)
+    if '\n' in repr_str:
+        repr_str = repr(repr_str)
+    if len(repr_str) < 16:
+        return repr_str
+    else:
+        return '<%s @ 0x%x>' % (type(r).__name__, id(r))
+
+#-----------------------------------------------------------------------------
+#                        ===  LR Parsing Engine ===
+#
+# The following classes are used for the LR parser itself.  These are not
+# used during table construction and are independent of the actual LR
+# table generation algorithm
+#-----------------------------------------------------------------------------
+
+# This class is used to hold non-terminal grammar symbols during parsing.
+# It normally has the following attributes set:
+#        .type       = Grammar symbol type
+#        .value      = Symbol value
+#        .lineno     = Starting line number
+#        .endlineno  = Ending line number (optional, set automatically)
+#        .lexpos     = Starting lex position
+#        .endlexpos  = Ending lex position (optional, set automatically)
+
+class YaccSymbol:
+    def __str__(self):
+        return self.type
+
+    def __repr__(self):
+        return str(self)
+
+# This class is a wrapper around the objects actually passed to each
+# grammar rule.   Index lookup and assignment actually assign the
+# .value attribute of the underlying YaccSymbol object.
+# The lineno() method returns the line number of a given
+# item (or 0 if not defined).   The linespan() method returns
+# a tuple of (startline,endline) representing the range of lines
+# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
+# representing the range of positional information for a symbol.
+
+class YaccProduction:
+    def __init__(self, s, stack=None):
+        self.slice = s
+        self.stack = stack
+        self.lexer = None
+        self.parser = None
+
+    def __getitem__(self, n):
+        if isinstance(n, slice):
+            return [s.value for s in self.slice[n]]
+        elif n >= 0:
+            return self.slice[n].value
+        else:
+            return self.stack[n].value
+
+    def __setitem__(self, n, v):
+        self.slice[n].value = v
+
+    def __getslice__(self, i, j):
+        return [s.value for s in self.slice[i:j]]
+
+    def __len__(self):
+        return len(self.slice)
+
+    def lineno(self, n):
+        return getattr(self.slice[n], 'lineno', 0)
+
+    def set_lineno(self, n, lineno):
+        self.slice[n].lineno = lineno
+
+    def linespan(self, n):
+        startline = getattr(self.slice[n], 'lineno', 0)
+        endline = getattr(self.slice[n], 'endlineno', startline)
+        return startline, endline
+
+    def lexpos(self, n):
+        return getattr(self.slice[n], 'lexpos', 0)
+
+    def set_lexpos(self, n, lexpos):
+        self.slice[n].lexpos = lexpos
+
+    def lexspan(self, n):
+        startpos = getattr(self.slice[n], 'lexpos', 0)
+        endpos = getattr(self.slice[n], 'endlexpos', startpos)
+        return startpos, endpos
+
+    def error(self):
+        raise SyntaxError
+
+# -----------------------------------------------------------------------------
+#                               == LRParser ==
+#
+# The LR Parsing engine.
+# -----------------------------------------------------------------------------
+
+class LRParser:
+    def __init__(self, lrtab, errorf):
+        self.productions = lrtab.lr_productions
+        self.action = lrtab.lr_action
+        self.goto = lrtab.lr_goto
+        self.errorfunc = errorf
+        self.set_defaulted_states()
+        self.errorok = True
+
+    def errok(self):
+        self.errorok = True
+
+    def restart(self):
+        del self.statestack[:]
+        del self.symstack[:]
+        sym = YaccSymbol()
+        sym.type = '$end'
+        self.symstack.append(sym)
+        self.statestack.append(0)
+
+    # Defaulted state support.
+    # This method identifies parser states where there is only one possible reduction action.
+    # For such states, the parser can make a choose to make a rule reduction without consuming
+    # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
+    # certain kinds of advanced parsing situations where the lexer and parser interact with
+    # each other or change states (i.e., manipulation of scope, lexer states, etc.).
+    #
+    # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
+    def set_defaulted_states(self):
+        self.defaulted_states = {}
+        for state, actions in self.action.items():
+            rules = list(actions.values())
+            if len(rules) == 1 and rules[0] < 0:
+                self.defaulted_states[state] = rules[0]
+
+    def disable_defaulted_states(self):
+        self.defaulted_states = {}
+
+    # parse().
+    #
+    # This is the core parsing engine.  To operate, it requires a lexer object.
+    # Two options are provided.  The debug flag turns on debugging so that you can
+    # see the various rule reductions and parsing steps.  tracking turns on position
+    # tracking.  In this mode, symbols will record the starting/ending line number and
+    # character index.
+
+    def parse(self, input=None, lexer=None, debug=False, tracking=False):
+        # If debugging has been specified as a flag, turn it into a logging object
+        if isinstance(debug, int) and debug:
+            debug = PlyLogger(sys.stderr)
+
+        lookahead = None                         # Current lookahead symbol
+        lookaheadstack = []                      # Stack of lookahead symbols
+        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
+        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
+        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
+        defaulted_states = self.defaulted_states # Local reference to defaulted states
+        pslice  = YaccProduction(None)           # Production object passed to grammar rules
+        errorcount = 0                           # Used during error recovery
+
+        if debug:
+            debug.info('PLY: PARSE DEBUG START')
+
+        # If no lexer was given, we will try to use the lex module
+        if not lexer:
+            from . import lex
+            lexer = lex.lexer
+
+        # Set up the lexer and parser objects on pslice
+        pslice.lexer = lexer
+        pslice.parser = self
+
+        # If input was supplied, pass to lexer
+        if input is not None:
+            lexer.input(input)
+
+        # Set the token function
+        get_token = self.token = lexer.token
+
+        # Set up the state and symbol stacks
+        statestack = self.statestack = []   # Stack of parsing states
+        symstack = self.symstack = []       # Stack of grammar symbols
+        pslice.stack = symstack             # Put in the production
+        errtoken   = None                   # Err token
+
+        # The start state is assumed to be (0,$end)
+
+        statestack.append(0)
+        sym = YaccSymbol()
+        sym.type = '$end'
+        symstack.append(sym)
+        state = 0
+        while True:
+            # Get the next symbol on the input.  If a lookahead symbol
+            # is already set, we just use that. Otherwise, we'll pull
+            # the next token off of the lookaheadstack or from the lexer
+
+            if debug:
+                debug.debug('State  : %s', state)
+
+            if state not in defaulted_states:
+                if not lookahead:
+                    if not lookaheadstack:
+                        lookahead = get_token()     # Get the next token
+                    else:
+                        lookahead = lookaheadstack.pop()
+                    if not lookahead:
+                        lookahead = YaccSymbol()
+                        lookahead.type = '$end'
+
+                # Check the action table
+                ltype = lookahead.type
+                t = actions[state].get(ltype)
+            else:
+                t = defaulted_states[state]
+                if debug:
+                    debug.debug('Defaulted state %s: Reduce using %d', state, -t)
+
+            if debug:
+                debug.debug('Stack  : %s',
+                            ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
+
+            if t is not None:
+                if t > 0:
+                    # shift a symbol on the stack
+                    statestack.append(t)
+                    state = t
+
+                    if debug:
+                        debug.debug('Action : Shift and goto state %s', t)
+
+                    symstack.append(lookahead)
+                    lookahead = None
+
+                    # Decrease error count on successful shift
+                    if errorcount:
+                        errorcount -= 1
+                    continue
+
+                if t < 0:
+                    # reduce a symbol on the stack, emit a production
+                    p = prod[-t]
+                    pname = p.name
+                    plen  = p.len
+
+                    # Get production function
+                    sym = YaccSymbol()
+                    sym.type = pname       # Production name
+                    sym.value = None
+
+                    if debug:
+                        if plen:
+                            debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
+                                       '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
+                                       goto[statestack[-1-plen]][pname])
+                        else:
+                            debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
+                                       goto[statestack[-1]][pname])
+
+                    if plen:
+                        targ = symstack[-plen-1:]
+                        targ[0] = sym
+
+                        if tracking:
+                            t1 = targ[1]
+                            sym.lineno = t1.lineno
+                            sym.lexpos = t1.lexpos
+                            t1 = targ[-1]
+                            sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
+                            sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
+
+                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+                        # The code enclosed in this section is duplicated
+                        # below as a performance optimization.  Make sure
+                        # changes get made in both locations.
+
+                        pslice.slice = targ
+
+                        try:
+                            # Call the grammar rule with our special slice object
+                            del symstack[-plen:]
+                            self.state = state
+                            p.callable(pslice)
+                            del statestack[-plen:]
+                            if debug:
+                                debug.info('Result : %s', format_result(pslice[0]))
+                            symstack.append(sym)
+                            state = goto[statestack[-1]][pname]
+                            statestack.append(state)
+                        except SyntaxError:
+                            # If an error was set. Enter error recovery state
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
+                            statestack.pop()                    # Pop back one state (before the reduce)
+                            state = statestack[-1]
+                            sym.type = 'error'
+                            sym.value = 'error'
+                            lookahead = sym
+                            errorcount = error_count
+                            self.errorok = False
+
+                        continue
+
+                    else:
+
+                        if tracking:
+                            sym.lineno = lexer.lineno
+                            sym.lexpos = lexer.lexpos
+
+                        targ = [sym]
+
+                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+                        # The code enclosed in this section is duplicated
+                        # above as a performance optimization.  Make sure
+                        # changes get made in both locations.
+
+                        pslice.slice = targ
+
+                        try:
+                            # Call the grammar rule with our special slice object
+                            self.state = state
+                            p.callable(pslice)
+                            if debug:
+                                debug.info('Result : %s', format_result(pslice[0]))
+                            symstack.append(sym)
+                            state = goto[statestack[-1]][pname]
+                            statestack.append(state)
+                        except SyntaxError:
+                            # If an error was set. Enter error recovery state
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            statestack.pop()                    # Pop back one state (before the reduce)
+                            state = statestack[-1]
+                            sym.type = 'error'
+                            sym.value = 'error'
+                            lookahead = sym
+                            errorcount = error_count
+                            self.errorok = False
+
+                        continue
+
+                if t == 0:
+                    n = symstack[-1]
+                    result = getattr(n, 'value', None)
+
+                    if debug:
+                        debug.info('Done   : Returning %s', format_result(result))
+                        debug.info('PLY: PARSE DEBUG END')
+
+                    return result
+
+            if t is None:
+
+                if debug:
+                    debug.error('Error  : %s',
+                                ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
+
+                # We have some kind of parsing error here.  To handle
+                # this, we are going to push the current token onto
+                # the tokenstack and replace it with an 'error' token.
+                # If there are any synchronization rules, they may
+                # catch it.
+                #
+                # In addition to pushing the error token, we call call
+                # the user defined p_error() function if this is the
+                # first syntax error.  This function is only called if
+                # errorcount == 0.
+                if errorcount == 0 or self.errorok:
+                    errorcount = error_count
+                    self.errorok = False
+                    errtoken = lookahead
+                    if errtoken.type == '$end':
+                        errtoken = None               # End of file!
+                    if self.errorfunc:
+                        if errtoken and not hasattr(errtoken, 'lexer'):
+                            errtoken.lexer = lexer
+                        self.state = state
+                        tok = self.errorfunc(errtoken)
+                        if self.errorok:
+                            # User must have done some kind of panic
+                            # mode recovery on their own.  The
+                            # returned token is the next lookahead
+                            lookahead = tok
+                            errtoken = None
+                            continue
+                    else:
+                        if errtoken:
+                            if hasattr(errtoken, 'lineno'):
+                                lineno = lookahead.lineno
+                            else:
+                                lineno = 0
+                            if lineno:
+                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
+                            else:
+                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
+                        else:
+                            sys.stderr.write('yacc: Parse error in input. EOF\n')
+                            return
+
+                else:
+                    errorcount = error_count
+
+                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
+                # entire parse has been rolled back and we're completely hosed.   The token is
+                # discarded and we just keep going.
+
+                if len(statestack) <= 1 and lookahead.type != '$end':
+                    lookahead = None
+                    errtoken = None
+                    state = 0
+                    # Nuke the pushback stack
+                    del lookaheadstack[:]
+                    continue
+
+                # case 2: the statestack has a couple of entries on it, but we're
+                # at the end of the file. nuke the top entry and generate an error token
+
+                # Start nuking entries on the stack
+                if lookahead.type == '$end':
+                    # Whoa. We're really hosed here. Bail out
+                    return
+
+                if lookahead.type != 'error':
+                    sym = symstack[-1]
+                    if sym.type == 'error':
+                        # Hmmm. Error is on top of stack, we'll just nuke input
+                        # symbol and continue
+                        if tracking:
+                            sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
+                            sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
+                        lookahead = None
+                        continue
+
+                    # Create the error symbol for the first time and make it the new lookahead symbol
+                    t = YaccSymbol()
+                    t.type = 'error'
+
+                    if hasattr(lookahead, 'lineno'):
+                        t.lineno = t.endlineno = lookahead.lineno
+                    if hasattr(lookahead, 'lexpos'):
+                        t.lexpos = t.endlexpos = lookahead.lexpos
+                    t.value = lookahead
+                    lookaheadstack.append(lookahead)
+                    lookahead = t
+                else:
+                    sym = symstack.pop()
+                    if tracking:
+                        lookahead.lineno = sym.lineno
+                        lookahead.lexpos = sym.lexpos
+                    statestack.pop()
+                    state = statestack[-1]
+
+                continue
+
+            # If we'r here, something really bad happened
+            raise RuntimeError('yacc: internal parser error!!!\n')
+
+# -----------------------------------------------------------------------------
+#                          === Grammar Representation ===
+#
+# The following functions, classes, and variables are used to represent and
+# manipulate the rules that make up a grammar.
+# -----------------------------------------------------------------------------
+
+# regex matching identifiers
+_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
+
+# -----------------------------------------------------------------------------
+# class Production:
+#
+# This class stores the raw information about a single production or grammar rule.
+# A grammar rule refers to a specification such as this:
+#
+#       expr : expr PLUS term
+#
+# Here are the basic attributes defined on all productions
+#
+#       name     - Name of the production.  For example 'expr'
+#       prod     - A list of symbols on the right side ['expr','PLUS','term']
+#       prec     - Production precedence level
+#       number   - Production number.
+#       func     - Function that executes on reduce
+#       file     - File where production function is defined
+#       lineno   - Line number where production function is defined
+#
+# The following attributes are defined or optional.
+#
+#       len       - Length of the production (number of symbols on right hand side)
+#       usyms     - Set of unique symbols found in the production
+# -----------------------------------------------------------------------------
+
+class Production(object):
+    reduced = 0
+    def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
+        self.name     = name
+        self.prod     = tuple(prod)
+        self.number   = number
+        self.func     = func
+        self.callable = None
+        self.file     = file
+        self.line     = line
+        self.prec     = precedence
+
+        # Internal settings used during table construction
+
+        self.len  = len(self.prod)   # Length of the production
+
+        # Create a list of unique production symbols used in the production
+        self.usyms = []
+        for s in self.prod:
+            if s not in self.usyms:
+                self.usyms.append(s)
+
+        # List of all LR items for the production
+        self.lr_items = []
+        self.lr_next = None
+
+        # Create a string representation
+        if self.prod:
+            self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
+        else:
+            self.str = '%s -> <empty>' % self.name
+
+    def __str__(self):
+        return self.str
+
+    def __repr__(self):
+        return 'Production(' + str(self) + ')'
+
+    def __len__(self):
+        return len(self.prod)
+
+    def __nonzero__(self):
+        return 1
+
+    def __getitem__(self, index):
+        return self.prod[index]
+
+    # Return the nth lr_item from the production (or None if at the end)
+    def lr_item(self, n):
+        if n > len(self.prod):
+            return None
+        p = LRItem(self, n)
+        # Precompute the list of productions immediately following.
+        try:
+            p.lr_after = self.Prodnames[p.prod[n+1]]
+        except (IndexError, KeyError):
+            p.lr_after = []
+        try:
+            p.lr_before = p.prod[n-1]
+        except IndexError:
+            p.lr_before = None
+        return p
+
+    # Bind the production function name to a callable
+    def bind(self, pdict):
+        if self.func:
+            self.callable = pdict[self.func]
+
+# -----------------------------------------------------------------------------
+# class LRItem
+#
+# This class represents a specific stage of parsing a production rule.  For
+# example:
+#
+#       expr : expr . PLUS term
+#
+# In the above, the "." represents the current location of the parse.  Here
+# basic attributes:
+#
+#       name       - Name of the production.  For example 'expr'
+#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
+#       number     - Production number.
+#
+#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
+#                    then lr_next refers to 'expr -> expr PLUS . term'
+#       lr_index   - LR item index (location of the ".") in the prod list.
+#       lookaheads - LALR lookahead symbols for this item
+#       len        - Length of the production (number of symbols on right hand side)
+#       lr_after    - List of all productions that immediately follow
+#       lr_before   - Grammar symbol immediately before
+# -----------------------------------------------------------------------------
+
+class LRItem(object):
+    def __init__(self, p, n):
+        self.name       = p.name
+        self.prod       = list(p.prod)
+        self.number     = p.number
+        self.lr_index   = n
+        self.lookaheads = {}
+        self.prod.insert(n, '.')
+        self.prod       = tuple(self.prod)
+        self.len        = len(self.prod)
+        self.usyms      = p.usyms
+
+    def __str__(self):
+        if self.prod:
+            s = '%s -> %s' % (self.name, ' '.join(self.prod))
+        else:
+            s = '%s -> <empty>' % self.name
+        return s
+
+    def __repr__(self):
+        return 'LRItem(' + str(self) + ')'
+
+# -----------------------------------------------------------------------------
+# rightmost_terminal()
+#
+# Return the rightmost terminal from a list of symbols.  Used in add_production()
+# -----------------------------------------------------------------------------
+def rightmost_terminal(symbols, terminals):
+    i = len(symbols) - 1
+    while i >= 0:
+        if symbols[i] in terminals:
+            return symbols[i]
+        i -= 1
+    return None
+
+# -----------------------------------------------------------------------------
+#                           === GRAMMAR CLASS ===
+#
+# The following class represents the contents of the specified grammar along
+# with various computed properties such as first sets, follow sets, LR items, etc.
+# This data is used for critical parts of the table generation process later.
+# -----------------------------------------------------------------------------
+
+class GrammarError(YaccError):
+    pass
+
+class Grammar(object):
+    def __init__(self, terminals):
+        self.Productions  = [None]  # A list of all of the productions.  The first
+                                    # entry is always reserved for the purpose of
+                                    # building an augmented grammar
+
+        self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
+                                    # productions of that nonterminal.
+
+        self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
+                                    # productions.
+
+        self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
+                                    # list of the rules where they are used.
+
+        for term in terminals:
+            self.Terminals[term] = []
+
+        self.Terminals['error'] = []
+
+        self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
+                                    # of rule numbers where they are used.
+
+        self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
+
+        self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
+
+        self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
+                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
+
+        self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
+                                    # This is only used to provide error checking and to generate
+                                    # a warning about unused precedence rules.
+
+        self.Start = None           # Starting symbol for the grammar
+
+
+    def __len__(self):
+        return len(self.Productions)
+
+    def __getitem__(self, index):
+        return self.Productions[index]
+
+    # -----------------------------------------------------------------------------
+    # set_precedence()
+    #
+    # Sets the precedence for a given terminal. assoc is the associativity such as
+    # 'left','right', or 'nonassoc'.  level is a numeric level.
+    #
+    # -----------------------------------------------------------------------------
+
+    def set_precedence(self, term, assoc, level):
+        assert self.Productions == [None], 'Must call set_precedence() before add_production()'
+        if term in self.Precedence:
+            raise GrammarError('Precedence already specified for terminal %r' % term)
+        if assoc not in ['left', 'right', 'nonassoc']:
+            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
+        self.Precedence[term] = (assoc, level)
+
+    # -----------------------------------------------------------------------------
+    # add_production()
+    #
+    # Given an action function, this function assembles a production rule and
+    # computes its precedence level.
+    #
+    # The production rule is supplied as a list of symbols.   For example,
+    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
+    # symbols ['expr','PLUS','term'].
+    #
+    # Precedence is determined by the precedence of the right-most non-terminal
+    # or the precedence of a terminal specified by %prec.
+    #
+    # A variety of error checks are performed to make sure production symbols
+    # are valid and that %prec is used correctly.
+    # -----------------------------------------------------------------------------
+
+    def add_production(self, prodname, syms, func=None, file='', line=0):
+
+        if prodname in self.Terminals:
+            raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
+        if prodname == 'error':
+            raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
+        if not _is_identifier.match(prodname):
+            raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
+
+        # Look for literal tokens
+        for n, s in enumerate(syms):
+            if s[0] in "'\"":
+                try:
+                    c = eval(s)
+                    if (len(c) > 1):
+                        raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
+                                           (file, line, s, prodname))
+                    if c not in self.Terminals:
+                        self.Terminals[c] = []
+                    syms[n] = c
+                    continue
+                except SyntaxError:
+                    pass
+            if not _is_identifier.match(s) and s != '%prec':
+                raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
+
+        # Determine the precedence level
+        if '%prec' in syms:
+            if syms[-1] == '%prec':
+                raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
+            if syms[-2] != '%prec':
+                raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
+                                   (file, line))
+            precname = syms[-1]
+            prodprec = self.Precedence.get(precname)
+            if not prodprec:
+                raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
+            else:
+                self.UsedPrecedence.add(precname)
+            del syms[-2:]     # Drop %prec from the rule
+        else:
+            # If no %prec, precedence is determined by the rightmost terminal symbol
+            precname = rightmost_terminal(syms, self.Terminals)
+            prodprec = self.Precedence.get(precname, ('right', 0))
+
+        # See if the rule is already in the rulemap
+        map = '%s -> %s' % (prodname, syms)
+        if map in self.Prodmap:
+            m = self.Prodmap[map]
+            raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
+                               'Previous definition at %s:%d' % (m.file, m.line))
+
+        # From this point on, everything is valid.  Create a new Production instance
+        pnumber  = len(self.Productions)
+        if prodname not in self.Nonterminals:
+            self.Nonterminals[prodname] = []
+
+        # Add the production number to Terminals and Nonterminals
+        for t in syms:
+            if t in self.Terminals:
+                self.Terminals[t].append(pnumber)
+            else:
+                if t not in self.Nonterminals:
+                    self.Nonterminals[t] = []
+                self.Nonterminals[t].append(pnumber)
+
+        # Create a production and add it to the list of productions
+        p = Production(pnumber, prodname, syms, prodprec, func, file, line)
+        self.Productions.append(p)
+        self.Prodmap[map] = p
+
+        # Add to the global productions list
+        try:
+            self.Prodnames[prodname].append(p)
+        except KeyError:
+            self.Prodnames[prodname] = [p]
+
+    # -----------------------------------------------------------------------------
+    # set_start()
+    #
+    # Sets the starting symbol and creates the augmented grammar.  Production
+    # rule 0 is S' -> start where start is the start symbol.
+    # -----------------------------------------------------------------------------
+
+    def set_start(self, start=None):
+        if not start:
+            start = self.Productions[1].name
+        if start not in self.Nonterminals:
+            raise GrammarError('start symbol %s undefined' % start)
+        self.Productions[0] = Production(0, "S'", [start])
+        self.Nonterminals[start].append(0)
+        self.Start = start
+
+    # -----------------------------------------------------------------------------
+    # find_unreachable()
+    #
+    # Find all of the nonterminal symbols that can't be reached from the starting
+    # symbol.  Returns a list of nonterminals that can't be reached.
+    # -----------------------------------------------------------------------------
+
+    def find_unreachable(self):
+
+        # Mark all symbols that are reachable from a symbol s
+        def mark_reachable_from(s):
+            if s in reachable:
+                return
+            reachable.add(s)
+            for p in self.Prodnames.get(s, []):
+                for r in p.prod:
+                    mark_reachable_from(r)
+
+        reachable = set()
+        mark_reachable_from(self.Productions[0].prod[0])
+        return [s for s in self.Nonterminals if s not in reachable]
+
+    # -----------------------------------------------------------------------------
+    # infinite_cycles()
+    #
+    # This function looks at the various parsing rules and tries to detect
+    # infinite recursion cycles (grammar rules where there is no possible way
+    # to derive a string of only terminals).
+    # -----------------------------------------------------------------------------
+
+    def infinite_cycles(self):
+        terminates = {}
+
+        # Terminals:
+        for t in self.Terminals:
+            terminates[t] = True
+
+        terminates['$end'] = True
+
+        # Nonterminals:
+
+        # Initialize to false:
+        for n in self.Nonterminals:
+            terminates[n] = False
+
+        # Then propagate termination until no change:
+        while True:
+            some_change = False
+            for (n, pl) in self.Prodnames.items():
+                # Nonterminal n terminates iff any of its productions terminates.
+                for p in pl:
+                    # Production p terminates iff all of its rhs symbols terminate.
+                    for s in p.prod:
+                        if not terminates[s]:
+                            # The symbol s does not terminate,
+                            # so production p does not terminate.
+                            p_terminates = False
+                            break
+                    else:
+                        # didn't break from the loop,
+                        # so every symbol s terminates
+                        # so production p terminates.
+                        p_terminates = True
+
+                    if p_terminates:
+                        # symbol n terminates!
+                        if not terminates[n]:
+                            terminates[n] = True
+                            some_change = True
+                        # Don't need to consider any more productions for this n.
+                        break
+
+            if not some_change:
+                break
+
+        infinite = []
+        for (s, term) in terminates.items():
+            if not term:
+                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+                    # s is used-but-not-defined, and we've already warned of that,
+                    # so it would be overkill to say that it's also non-terminating.
+                    pass
+                else:
+                    infinite.append(s)
+
+        return infinite
+
+    # -----------------------------------------------------------------------------
+    # undefined_symbols()
+    #
+    # Find all symbols that were used the grammar, but not defined as tokens or
+    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
+    # and prod is the production where the symbol was used.
+    # -----------------------------------------------------------------------------
+    def undefined_symbols(self):
+        result = []
+        for p in self.Productions:
+            if not p:
+                continue
+
+            for s in p.prod:
+                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+                    result.append((s, p))
+        return result
+
+    # -----------------------------------------------------------------------------
+    # unused_terminals()
+    #
+    # Find all terminals that were defined, but not used by the grammar.  Returns
+    # a list of all symbols.
+    # -----------------------------------------------------------------------------
+    def unused_terminals(self):
+        unused_tok = []
+        for s, v in self.Terminals.items():
+            if s != 'error' and not v:
+                unused_tok.append(s)
+
+        return unused_tok
+
+    # ------------------------------------------------------------------------------
+    # unused_rules()
+    #
+    # Find all grammar rules that were defined,  but not used (maybe not reachable)
+    # Returns a list of productions.
+    # ------------------------------------------------------------------------------
+
+    def unused_rules(self):
+        unused_prod = []
+        for s, v in self.Nonterminals.items():
+            if not v:
+                p = self.Prodnames[s][0]
+                unused_prod.append(p)
+        return unused_prod
+
+    # -----------------------------------------------------------------------------
+    # unused_precedence()
+    #
+    # Returns a list of tuples (term,precedence) corresponding to precedence
+    # rules that were never used by the grammar.  term is the name of the terminal
+    # on which precedence was applied and precedence is a string such as 'left' or
+    # 'right' corresponding to the type of precedence.
+    # -----------------------------------------------------------------------------
+
+    def unused_precedence(self):
+        unused = []
+        for termname in self.Precedence:
+            if not (termname in self.Terminals or termname in self.UsedPrecedence):
+                unused.append((termname, self.Precedence[termname][0]))
+
+        return unused
+
+    # -------------------------------------------------------------------------
+    # _first()
+    #
+    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
+    #
+    # During execution of compute_first1, the result may be incomplete.
+    # Afterward (e.g., when called from compute_follow()), it will be complete.
+    # -------------------------------------------------------------------------
+    def _first(self, beta):
+
+        # We are computing First(x1,x2,x3,...,xn)
+        result = []
+        for x in beta:
+            x_produces_empty = False
+
+            # Add all the non-<empty> symbols of First[x] to the result.
+            for f in self.First[x]:
+                if f == '<empty>':
+                    x_produces_empty = True
+                else:
+                    if f not in result:
+                        result.append(f)
+
+            if x_produces_empty:
+                # We have to consider the next x in beta,
+                # i.e. stay in the loop.
+                pass
+            else:
+                # We don't have to consider any further symbols in beta.
+                break
+        else:
+            # There was no 'break' from the loop,
+            # so x_produces_empty was true for all x in beta,
+            # so beta produces empty as well.
+            result.append('<empty>')
+
+        return result
+
+    # -------------------------------------------------------------------------
+    # compute_first()
+    #
+    # Compute the value of FIRST1(X) for all symbols
+    # -------------------------------------------------------------------------
+    def compute_first(self):
+        if self.First:
+            return self.First
+
+        # Terminals:
+        for t in self.Terminals:
+            self.First[t] = [t]
+
+        self.First['$end'] = ['$end']
+
+        # Nonterminals:
+
+        # Initialize to the empty set:
+        for n in self.Nonterminals:
+            self.First[n] = []
+
+        # Then propagate symbols until no change:
+        while True:
+            some_change = False
+            for n in self.Nonterminals:
+                for p in self.Prodnames[n]:
+                    for f in self._first(p.prod):
+                        if f not in self.First[n]:
+                            self.First[n].append(f)
+                            some_change = True
+            if not some_change:
+                break
+
+        return self.First
+
+    # ---------------------------------------------------------------------
+    # compute_follow()
+    #
+    # Computes all of the follow sets for every non-terminal symbol.  The
+    # follow set is the set of all symbols that might follow a given
+    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
+    # ---------------------------------------------------------------------
+    def compute_follow(self, start=None):
+        # If already computed, return the result
+        if self.Follow:
+            return self.Follow
+
+        # If first sets not computed yet, do that first.
+        if not self.First:
+            self.compute_first()
+
+        # Add '$end' to the follow list of the start symbol
+        for k in self.Nonterminals:
+            self.Follow[k] = []
+
+        if not start:
+            start = self.Productions[1].name
+
+        self.Follow[start] = ['$end']
+
+        while True:
+            didadd = False
+            for p in self.Productions[1:]:
+                # Here is the production set
+                for i, B in enumerate(p.prod):
+                    if B in self.Nonterminals:
+                        # Okay. We got a non-terminal in a production
+                        fst = self._first(p.prod[i+1:])
+                        hasempty = False
+                        for f in fst:
+                            if f != '<empty>' and f not in self.Follow[B]:
+                                self.Follow[B].append(f)
+                                didadd = True
+                            if f == '<empty>':
+                                hasempty = True
+                        if hasempty or i == (len(p.prod)-1):
+                            # Add elements of follow(a) to follow(b)
+                            for f in self.Follow[p.name]:
+                                if f not in self.Follow[B]:
+                                    self.Follow[B].append(f)
+                                    didadd = True
+            if not didadd:
+                break
+        return self.Follow
+
+
+    # -----------------------------------------------------------------------------
+    # build_lritems()
+    #
+    # This function walks the list of productions and builds a complete set of the
+    # LR items.  The LR items are stored in two ways:  First, they are uniquely
+    # numbered and placed in the list _lritems.  Second, a linked list of LR items
+    # is built for each production.  For example:
+    #
+    #   E -> E PLUS E
+    #
+    # Creates the list
+    #
+    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
+    # -----------------------------------------------------------------------------
+
+    def build_lritems(self):
+        for p in self.Productions:
+            lastlri = p
+            i = 0
+            lr_items = []
+            while True:
+                if i > len(p):
+                    lri = None
+                else:
+                    lri = LRItem(p, i)
+                    # Precompute the list of productions immediately following
+                    try:
+                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
+                    except (IndexError, KeyError):
+                        lri.lr_after = []
+                    try:
+                        lri.lr_before = lri.prod[i-1]
+                    except IndexError:
+                        lri.lr_before = None
+
+                lastlri.lr_next = lri
+                if not lri:
+                    break
+                lr_items.append(lri)
+                lastlri = lri
+                i += 1
+            p.lr_items = lr_items
+
+# -----------------------------------------------------------------------------
+#                           === LR Generator ===
+#
+# The following classes and functions are used to generate LR parsing tables on
+# a grammar.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# digraph()
+# traverse()
+#
+# The following two functions are used to compute set valued functions
+# of the form:
+#
+#     F(x) = F'(x) U U{F(y) | x R y}
+#
+# This is used to compute the values of Read() sets as well as FOLLOW sets
+# in LALR(1) generation.
+#
+# Inputs:  X    - An input set
+#          R    - A relation
+#          FP   - Set-valued function
+# ------------------------------------------------------------------------------
+
+def digraph(X, R, FP):
+    N = {}
+    for x in X:
+        N[x] = 0
+    stack = []
+    F = {}
+    for x in X:
+        if N[x] == 0:
+            traverse(x, N, stack, F, X, R, FP)
+    return F
+
+def traverse(x, N, stack, F, X, R, FP):
+    stack.append(x)
+    d = len(stack)
+    N[x] = d
+    F[x] = FP(x)             # F(X) <- F'(x)
+
+    rel = R(x)               # Get y's related to x
+    for y in rel:
+        if N[y] == 0:
+            traverse(y, N, stack, F, X, R, FP)
+        N[x] = min(N[x], N[y])
+        for a in F.get(y, []):
+            if a not in F[x]:
+                F[x].append(a)
+    if N[x] == d:
+        N[stack[-1]] = MAXINT
+        F[stack[-1]] = F[x]
+        element = stack.pop()
+        while element != x:
+            N[stack[-1]] = MAXINT
+            F[stack[-1]] = F[x]
+            element = stack.pop()
+
+class LALRError(YaccError):
+    pass
+
+
+# -----------------------------------------------------------------------------
+#                             == LRTable ==
+#
+# This class implements the LR table generation algorithm.  There are no
+# public methods.
+# -----------------------------------------------------------------------------
+
+class LRTable:
+    def __init__(self, grammar, log=None):
+        self.grammar = grammar
+
+        # Set up the logger
+        if not log:
+            log = NullLogger()
+        self.log = log
+
+        # Internal attributes
+        self.lr_action     = {}        # Action table
+        self.lr_goto       = {}        # Goto table
+        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
+        self.lr_goto_cache = {}        # Cache of computed gotos
+        self.lr0_cidhash   = {}        # Cache of closures
+
+        self._add_count    = 0         # Internal counter used to detect cycles
+
+        # Diagnostic information filled in by the table generator
+        self.sr_conflict   = 0
+        self.rr_conflict   = 0
+        self.conflicts     = []        # List of conflicts
+
+        self.sr_conflicts  = []
+        self.rr_conflicts  = []
+
+        # Build the tables
+        self.grammar.build_lritems()
+        self.grammar.compute_first()
+        self.grammar.compute_follow()
+        self.lr_parse_table()
+
+    # Bind all production function names to callable objects in pdict
+    def bind_callables(self, pdict):
+        for p in self.lr_productions:
+            p.bind(pdict)
+
+    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
+
+    def lr0_closure(self, I):
+        self._add_count += 1
+
+        # Add everything in I to J
+        J = I[:]
+        didadd = True
+        while didadd:
+            didadd = False
+            for j in J:
+                for x in j.lr_after:
+                    if getattr(x, 'lr0_added', 0) == self._add_count:
+                        continue
+                    # Add B --> .G to J
+                    J.append(x.lr_next)
+                    x.lr0_added = self._add_count
+                    didadd = True
+
+        return J
+
+    # Compute the LR(0) goto function goto(I,X) where I is a set
+    # of LR(0) items and X is a grammar symbol.   This function is written
+    # in a way that guarantees uniqueness of the generated goto sets
+    # (i.e. the same goto set will never be returned as two different Python
+    # objects).  With uniqueness, we can later do fast set comparisons using
+    # id(obj) instead of element-wise comparison.
+
+    def lr0_goto(self, I, x):
+        # First we look for a previously cached entry
+        g = self.lr_goto_cache.get((id(I), x))
+        if g:
+            return g
+
+        # Now we generate the goto set in a way that guarantees uniqueness
+        # of the result
+
+        s = self.lr_goto_cache.get(x)
+        if not s:
+            s = {}
+            self.lr_goto_cache[x] = s
+
+        gs = []
+        for p in I:
+            n = p.lr_next
+            if n and n.lr_before == x:
+                s1 = s.get(id(n))
+                if not s1:
+                    s1 = {}
+                    s[id(n)] = s1
+                gs.append(n)
+                s = s1
+        g = s.get('$end')
+        if not g:
+            if gs:
+                g = self.lr0_closure(gs)
+                s['$end'] = g
+            else:
+                s['$end'] = gs
+        self.lr_goto_cache[(id(I), x)] = g
+        return g
+
+    # Compute the LR(0) sets of item function
+    def lr0_items(self):
+        C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
+        i = 0
+        for I in C:
+            self.lr0_cidhash[id(I)] = i
+            i += 1
+
+        # Loop over the items in C and each grammar symbols
+        i = 0
+        while i < len(C):
+            I = C[i]
+            i += 1
+
+            # Collect all of the symbols that could possibly be in the goto(I,X) sets
+            asyms = {}
+            for ii in I:
+                for s in ii.usyms:
+                    asyms[s] = None
+
+            for x in asyms:
+                g = self.lr0_goto(I, x)
+                if not g or id(g) in self.lr0_cidhash:
+                    continue
+                self.lr0_cidhash[id(g)] = len(C)
+                C.append(g)
+
+        return C
+
+    # -----------------------------------------------------------------------------
+    #                       ==== LALR(1) Parsing ====
+    #
+    # LALR(1) parsing is almost exactly the same as SLR except that instead of
+    # relying upon Follow() sets when performing reductions, a more selective
+    # lookahead set that incorporates the state of the LR(0) machine is utilized.
+    # Thus, we mainly just have to focus on calculating the lookahead sets.
+    #
+    # The method used here is due to DeRemer and Pennelo (1982).
+    #
+    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
+    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
+    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
+    #
+    # Further details can also be found in:
+    #
+    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
+    #      McGraw-Hill Book Company, (1985).
+    #
+    # -----------------------------------------------------------------------------
+
+    # -----------------------------------------------------------------------------
+    # compute_nullable_nonterminals()
+    #
+    # Creates a dictionary containing all of the non-terminals that might produce
+    # an empty production.
+    # -----------------------------------------------------------------------------
+
+    def compute_nullable_nonterminals(self):
+        nullable = set()
+        num_nullable = 0
+        while True:
+            for p in self.grammar.Productions[1:]:
+                if p.len == 0:
+                    nullable.add(p.name)
+                    continue
+                for t in p.prod:
+                    if t not in nullable:
+                        break
+                else:
+                    nullable.add(p.name)
+            if len(nullable) == num_nullable:
+                break
+            num_nullable = len(nullable)
+        return nullable
+
+    # -----------------------------------------------------------------------------
+    # find_nonterminal_trans(C)
+    #
+    # Given a set of LR(0) items, this functions finds all of the non-terminal
+    # transitions.    These are transitions in which a dot appears immediately before
+    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
+    # is the state number and N is the nonterminal symbol.
+    #
+    # The input C is the set of LR(0) items.
+    # -----------------------------------------------------------------------------
+
+    def find_nonterminal_transitions(self, C):
+        trans = []
+        for stateno, state in enumerate(C):
+            for p in state:
+                if p.lr_index < p.len - 1:
+                    t = (stateno, p.prod[p.lr_index+1])
+                    if t[1] in self.grammar.Nonterminals:
+                        if t not in trans:
+                            trans.append(t)
+        return trans
+
+    # -----------------------------------------------------------------------------
+    # dr_relation()
+    #
+    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
+    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
+    #
+    # Returns a list of terminals.
+    # -----------------------------------------------------------------------------
+
+    def dr_relation(self, C, trans, nullable):
+        state, N = trans
+        terms = []
+
+        g = self.lr0_goto(C[state], N)
+        for p in g:
+            if p.lr_index < p.len - 1:
+                a = p.prod[p.lr_index+1]
+                if a in self.grammar.Terminals:
+                    if a not in terms:
+                        terms.append(a)
+
+        # This extra bit is to handle the start state
+        if state == 0 and N == self.grammar.Productions[0].prod[0]:
+            terms.append('$end')
+
+        return terms
+
+    # -----------------------------------------------------------------------------
+    # reads_relation()
+    #
+    # Computes the READS() relation (p,A) READS (t,C).
+    # -----------------------------------------------------------------------------
+
+    def reads_relation(self, C, trans, empty):
+        # Look for empty transitions
+        rel = []
+        state, N = trans
+
+        g = self.lr0_goto(C[state], N)
+        j = self.lr0_cidhash.get(id(g), -1)
+        for p in g:
+            if p.lr_index < p.len - 1:
+                a = p.prod[p.lr_index + 1]
+                if a in empty:
+                    rel.append((j, a))
+
+        return rel
+
+    # -----------------------------------------------------------------------------
+    # compute_lookback_includes()
+    #
+    # Determines the lookback and includes relations
+    #
+    # LOOKBACK:
+    #
+    # This relation is determined by running the LR(0) state machine forward.
+    # For example, starting with a production "N : . A B C", we run it forward
+    # to obtain "N : A B C ."   We then build a relationship between this final
+    # state and the starting state.   These relationships are stored in a dictionary
+    # lookdict.
+    #
+    # INCLUDES:
+    #
+    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
+    #
+    # This relation is used to determine non-terminal transitions that occur
+    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
+    # if the following holds:
+    #
+    #       B -> LAT, where T -> epsilon and p' -L-> p
+    #
+    # L is essentially a prefix (which may be empty), T is a suffix that must be
+    # able to derive an empty string.  State p' must lead to state p with the string L.
+    #
+    # -----------------------------------------------------------------------------
+
+    def compute_lookback_includes(self, C, trans, nullable):
+        lookdict = {}          # Dictionary of lookback relations
+        includedict = {}       # Dictionary of include relations
+
+        # Make a dictionary of non-terminal transitions
+        dtrans = {}
+        for t in trans:
+            dtrans[t] = 1
+
+        # Loop over all transitions and compute lookbacks and includes
+        for state, N in trans:
+            lookb = []
+            includes = []
+            for p in C[state]:
+                if p.name != N:
+                    continue
+
+                # Okay, we have a name match.  We now follow the production all the way
+                # through the state machine until we get the . on the right hand side
+
+                lr_index = p.lr_index
+                j = state
+                while lr_index < p.len - 1:
+                    lr_index = lr_index + 1
+                    t = p.prod[lr_index]
+
+                    # Check to see if this symbol and state are a non-terminal transition
+                    if (j, t) in dtrans:
+                        # Yes.  Okay, there is some chance that this is an includes relation
+                        # the only way to know for certain is whether the rest of the
+                        # production derives empty
+
+                        li = lr_index + 1
+                        while li < p.len:
+                            if p.prod[li] in self.grammar.Terminals:
+                                break      # No forget it
+                            if p.prod[li] not in nullable:
+                                break
+                            li = li + 1
+                        else:
+                            # Appears to be a relation between (j,t) and (state,N)
+                            includes.append((j, t))
+
+                    g = self.lr0_goto(C[j], t)               # Go to next set
+                    j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
+
+                # When we get here, j is the final state, now we have to locate the production
+                for r in C[j]:
+                    if r.name != p.name:
+                        continue
+                    if r.len != p.len:
+                        continue
+                    i = 0
+                    # This look is comparing a production ". A B C" with "A B C ."
+                    while i < r.lr_index:
+                        if r.prod[i] != p.prod[i+1]:
+                            break
+                        i = i + 1
+                    else:
+                        lookb.append((j, r))
+            for i in includes:
+                if i not in includedict:
+                    includedict[i] = []
+                includedict[i].append((state, N))
+            lookdict[(state, N)] = lookb
+
+        return lookdict, includedict
+
+    # -----------------------------------------------------------------------------
+    # compute_read_sets()
+    #
+    # Given a set of LR(0) items, this function computes the read sets.
+    #
+    # Inputs:  C        =  Set of LR(0) items
+    #          ntrans   = Set of nonterminal transitions
+    #          nullable = Set of empty transitions
+    #
+    # Returns a set containing the read sets
+    # -----------------------------------------------------------------------------
+
+    def compute_read_sets(self, C, ntrans, nullable):
+        FP = lambda x: self.dr_relation(C, x, nullable)
+        R =  lambda x: self.reads_relation(C, x, nullable)
+        F = digraph(ntrans, R, FP)
+        return F
+
+    # -----------------------------------------------------------------------------
+    # compute_follow_sets()
+    #
+    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
+    # and an include set, this function computes the follow sets
+    #
+    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
+    #
+    # Inputs:
+    #            ntrans     = Set of nonterminal transitions
+    #            readsets   = Readset (previously computed)
+    #            inclsets   = Include sets (previously computed)
+    #
+    # Returns a set containing the follow sets
+    # -----------------------------------------------------------------------------
+
+    def compute_follow_sets(self, ntrans, readsets, inclsets):
+        FP = lambda x: readsets[x]
+        R  = lambda x: inclsets.get(x, [])
+        F = digraph(ntrans, R, FP)
+        return F
+
+    # -----------------------------------------------------------------------------
+    # add_lookaheads()
+    #
+    # Attaches the lookahead symbols to grammar rules.
+    #
+    # Inputs:    lookbacks         -  Set of lookback relations
+    #            followset         -  Computed follow set
+    #
+    # This function directly attaches the lookaheads to productions contained
+    # in the lookbacks set
+    # -----------------------------------------------------------------------------
+
+    def add_lookaheads(self, lookbacks, followset):
+        for trans, lb in lookbacks.items():
+            # Loop over productions in lookback
+            for state, p in lb:
+                if state not in p.lookaheads:
+                    p.lookaheads[state] = []
+                f = followset.get(trans, [])
+                for a in f:
+                    if a not in p.lookaheads[state]:
+                        p.lookaheads[state].append(a)
+
+    # -----------------------------------------------------------------------------
+    # add_lalr_lookaheads()
+    #
+    # This function does all of the work of adding lookahead information for use
+    # with LALR parsing
+    # -----------------------------------------------------------------------------
+
+    def add_lalr_lookaheads(self, C):
+        # Determine all of the nullable nonterminals
+        nullable = self.compute_nullable_nonterminals()
+
+        # Find all non-terminal transitions
+        trans = self.find_nonterminal_transitions(C)
+
+        # Compute read sets
+        readsets = self.compute_read_sets(C, trans, nullable)
+
+        # Compute lookback/includes relations
+        lookd, included = self.compute_lookback_includes(C, trans, nullable)
+
+        # Compute LALR FOLLOW sets
+        followsets = self.compute_follow_sets(trans, readsets, included)
+
+        # Add all of the lookaheads
+        self.add_lookaheads(lookd, followsets)
+
+    # -----------------------------------------------------------------------------
+    # lr_parse_table()
+    #
+    # This function constructs the parse tables for SLR or LALR
+    # -----------------------------------------------------------------------------
+    def lr_parse_table(self):
+        Productions = self.grammar.Productions
+        Precedence  = self.grammar.Precedence
+        goto   = self.lr_goto         # Goto array
+        action = self.lr_action       # Action array
+        log    = self.log             # Logger for output
+
+        actionp = {}                  # Action production array (temporary)
+
+        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
+        # This determines the number of states
+
+        C = self.lr0_items()
+        self.add_lalr_lookaheads(C)
+
+        # Build the parser table, state by state
+        st = 0
+        for I in C:
+            # Loop over each production in I
+            actlist = []              # List of actions
+            st_action  = {}
+            st_actionp = {}
+            st_goto    = {}
+            log.info('')
+            log.info('state %d', st)
+            log.info('')
+            for p in I:
+                log.info('    (%d) %s', p.number, p)
+            log.info('')
+
+            for p in I:
+                    if p.len == p.lr_index + 1:
+                        if p.name == "S'":
+                            # Start symbol. Accept!
+                            st_action['$end'] = 0
+                            st_actionp['$end'] = p
+                        else:
+                            # We are at the end of a production.  Reduce!
+                            laheads = p.lookaheads[st]
+                            for a in laheads:
+                                actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
+                                r = st_action.get(a)
+                                if r is not None:
+                                    # Whoa. Have a shift/reduce or reduce/reduce conflict
+                                    if r > 0:
+                                        # Need to decide on shift or reduce here
+                                        # By default we favor shifting. Need to add
+                                        # some precedence rules here.
+
+                                        # Shift precedence comes from the token
+                                        sprec, slevel = Precedence.get(a, ('right', 0))
+
+                                        # Reduce precedence comes from rule being reduced (p)
+                                        rprec, rlevel = Productions[p.number].prec
+
+                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
+                                            # We really need to reduce here.
+                                            st_action[a] = -p.number
+                                            st_actionp[a] = p
+                                            if not slevel and not rlevel:
+                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
+                                                self.sr_conflicts.append((st, a, 'reduce'))
+                                            Productions[p.number].reduced += 1
+                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
+                                            st_action[a] = None
+                                        else:
+                                            # Hmmm. Guess we'll keep the shift
+                                            if not rlevel:
+                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
+                                                self.sr_conflicts.append((st, a, 'shift'))
+                                    elif r < 0:
+                                        # Reduce/reduce conflict.   In this case, we favor the rule
+                                        # that was defined first in the grammar file
+                                        oldp = Productions[-r]
+                                        pp = Productions[p.number]
+                                        if oldp.line > pp.line:
+                                            st_action[a] = -p.number
+                                            st_actionp[a] = p
+                                            chosenp, rejectp = pp, oldp
+                                            Productions[p.number].reduced += 1
+                                            Productions[oldp.number].reduced -= 1
+                                        else:
+                                            chosenp, rejectp = oldp, pp
+                                        self.rr_conflicts.append((st, chosenp, rejectp))
+                                        log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
+                                                 a, st_actionp[a].number, st_actionp[a])
+                                    else:
+                                        raise LALRError('Unknown conflict in state %d' % st)
+                                else:
+                                    st_action[a] = -p.number
+                                    st_actionp[a] = p
+                                    Productions[p.number].reduced += 1
+                    else:
+                        i = p.lr_index
+                        a = p.prod[i+1]       # Get symbol right after the "."
+                        if a in self.grammar.Terminals:
+                            g = self.lr0_goto(I, a)
+                            j = self.lr0_cidhash.get(id(g), -1)
+                            if j >= 0:
+                                # We are in a shift state
+                                actlist.append((a, p, 'shift and go to state %d' % j))
+                                r = st_action.get(a)
+                                if r is not None:
+                                    # Whoa have a shift/reduce or shift/shift conflict
+                                    if r > 0:
+                                        if r != j:
+                                            raise LALRError('Shift/shift conflict in state %d' % st)
+                                    elif r < 0:
+                                        # Do a precedence check.
+                                        #   -  if precedence of reduce rule is higher, we reduce.
+                                        #   -  if precedence of reduce is same and left assoc, we reduce.
+                                        #   -  otherwise we shift
+
+                                        # Shift precedence comes from the token
+                                        sprec, slevel = Precedence.get(a, ('right', 0))
+
+                                        # Reduce precedence comes from the rule that could have been reduced
+                                        rprec, rlevel = Productions[st_actionp[a].number].prec
+
+                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
+                                            # We decide to shift here... highest precedence to shift
+                                            Productions[st_actionp[a].number].reduced -= 1
+                                            st_action[a] = j
+                                            st_actionp[a] = p
+                                            if not rlevel:
+                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
+                                                self.sr_conflicts.append((st, a, 'shift'))
+                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
+                                            st_action[a] = None
+                                        else:
+                                            # Hmmm. Guess we'll keep the reduce
+                                            if not slevel and not rlevel:
+                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
+                                                self.sr_conflicts.append((st, a, 'reduce'))
+
+                                    else:
+                                        raise LALRError('Unknown conflict in state %d' % st)
+                                else:
+                                    st_action[a] = j
+                                    st_actionp[a] = p
+
+            # Print the actions associated with each terminal
+            _actprint = {}
+            for a, p, m in actlist:
+                if a in st_action:
+                    if p is st_actionp[a]:
+                        log.info('    %-15s %s', a, m)
+                        _actprint[(a, m)] = 1
+            log.info('')
+            # Print the actions that were not used. (debugging)
+            not_used = 0
+            for a, p, m in actlist:
+                if a in st_action:
+                    if p is not st_actionp[a]:
+                        if not (a, m) in _actprint:
+                            log.debug('  ! %-15s [ %s ]', a, m)
+                            not_used = 1
+                            _actprint[(a, m)] = 1
+            if not_used:
+                log.debug('')
+
+            # Construct the goto table for this state
+
+            nkeys = {}
+            for ii in I:
+                for s in ii.usyms:
+                    if s in self.grammar.Nonterminals:
+                        nkeys[s] = None
+            for n in nkeys:
+                g = self.lr0_goto(I, n)
+                j = self.lr0_cidhash.get(id(g), -1)
+                if j >= 0:
+                    st_goto[n] = j
+                    log.info('    %-30s shift and go to state %d', n, j)
+
+            action[st] = st_action
+            actionp[st] = st_actionp
+            goto[st] = st_goto
+            st += 1
+
+# -----------------------------------------------------------------------------
+#                            === INTROSPECTION ===
+#
+# The following functions and classes are used to implement the PLY
+# introspection features followed by the yacc() function itself.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# get_caller_module_dict()
+#
+# This function returns a dictionary containing all of the symbols defined within
+# a caller further down the call stack.  This is used to get the environment
+# associated with the yacc() call if none was provided.
+# -----------------------------------------------------------------------------
+
+def get_caller_module_dict(levels):
+    f = sys._getframe(levels)
+    ldict = f.f_globals.copy()
+    if f.f_globals != f.f_locals:
+        ldict.update(f.f_locals)
+    return ldict
+
+# -----------------------------------------------------------------------------
+# parse_grammar()
+#
+# This takes a raw grammar rule string and parses it into production data
+# -----------------------------------------------------------------------------
+def parse_grammar(doc, file, line):
+    grammar = []
+    # Split the doc string into lines
+    pstrings = doc.splitlines()
+    lastp = None
+    dline = line
+    for ps in pstrings:
+        dline += 1
+        p = ps.split()
+        if not p:
+            continue
+        try:
+            if p[0] == '|':
+                # This is a continuation of a previous rule
+                if not lastp:
+                    raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
+                prodname = lastp
+                syms = p[1:]
+            else:
+                prodname = p[0]
+                lastp = prodname
+                syms   = p[2:]
+                assign = p[1]
+                if assign != ':' and assign != '::=':
+                    raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
+
+            grammar.append((file, dline, prodname, syms))
+        except SyntaxError:
+            raise
+        except Exception:
+            raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
+
+    return grammar
+
+# -----------------------------------------------------------------------------
+# ParserReflect()
+#
+# This class represents information extracted for building a parser including
+# start symbol, error function, tokens, precedence list, action functions,
+# etc.
+# -----------------------------------------------------------------------------
+class ParserReflect(object):
+    def __init__(self, pdict, log=None):
+        self.pdict      = pdict
+        self.start      = None
+        self.error_func = None
+        self.tokens     = None
+        self.modules    = set()
+        self.grammar    = []
+        self.error      = False
+
+        if log is None:
+            self.log = PlyLogger(sys.stderr)
+        else:
+            self.log = log
+
+    # Get all of the basic information
+    def get_all(self):
+        self.get_start()
+        self.get_error_func()
+        self.get_tokens()
+        self.get_precedence()
+        self.get_pfunctions()
+
+    # Validate all of the information
+    def validate_all(self):
+        self.validate_start()
+        self.validate_error_func()
+        self.validate_tokens()
+        self.validate_precedence()
+        self.validate_pfunctions()
+        self.validate_modules()
+        return self.error
+
+    # Compute a signature over the grammar
+    def signature(self):
+        parts = []
+        try:
+            if self.start:
+                parts.append(self.start)
+            if self.prec:
+                parts.append(''.join([''.join(p) for p in self.prec]))
+            if self.tokens:
+                parts.append(' '.join(self.tokens))
+            for f in self.pfuncs:
+                if f[3]:
+                    parts.append(f[3])
+        except (TypeError, ValueError):
+            pass
+        return ''.join(parts)
+
+    # -----------------------------------------------------------------------------
+    # validate_modules()
+    #
+    # This method checks to see if there are duplicated p_rulename() functions
+    # in the parser module file.  Without this function, it is really easy for
+    # users to make mistakes by cutting and pasting code fragments (and it's a real
+    # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
+    # we just do a little regular expression pattern matching of def statements
+    # to try and detect duplicates.
+    # -----------------------------------------------------------------------------
+
+    def validate_modules(self):
+        # Match def p_funcname(
+        fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
+
+        for module in self.modules:
+            try:
+                lines, linen = inspect.getsourcelines(module)
+            except IOError:
+                continue
+
+            counthash = {}
+            for linen, line in enumerate(lines):
+                linen += 1
+                m = fre.match(line)
+                if m:
+                    name = m.group(1)
+                    prev = counthash.get(name)
+                    if not prev:
+                        counthash[name] = linen
+                    else:
+                        filename = inspect.getsourcefile(module)
+                        self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
+                                         filename, linen, name, prev)
+
+    # Get the start symbol
+    def get_start(self):
+        self.start = self.pdict.get('start')
+
+    # Validate the start symbol
+    def validate_start(self):
+        if self.start is not None:
+            if not isinstance(self.start, str):
+                self.log.error("'start' must be a string")
+
+    # Look for error handler
+    def get_error_func(self):
+        self.error_func = self.pdict.get('p_error')
+
+    # Validate the error function
+    def validate_error_func(self):
+        if self.error_func:
+            if isinstance(self.error_func, types.FunctionType):
+                ismethod = 0
+            elif isinstance(self.error_func, types.MethodType):
+                ismethod = 1
+            else:
+                self.log.error("'p_error' defined, but is not a function or method")
+                self.error = True
+                return
+
+            eline = self.error_func.__code__.co_firstlineno
+            efile = self.error_func.__code__.co_filename
+            module = inspect.getmodule(self.error_func)
+            self.modules.add(module)
+
+            argcount = self.error_func.__code__.co_argcount - ismethod
+            if argcount != 1:
+                self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
+                self.error = True
+
+    # Get the tokens map
+    def get_tokens(self):
+        tokens = self.pdict.get('tokens')
+        if not tokens:
+            self.log.error('No token list is defined')
+            self.error = True
+            return
+
+        if not isinstance(tokens, (list, tuple)):
+            self.log.error('tokens must be a list or tuple')
+            self.error = True
+            return
+
+        if not tokens:
+            self.log.error('tokens is empty')
+            self.error = True
+            return
+
+        self.tokens = sorted(tokens)
+
+    # Validate the tokens
+    def validate_tokens(self):
+        # Validate the tokens.
+        if 'error' in self.tokens:
+            self.log.error("Illegal token name 'error'. Is a reserved word")
+            self.error = True
+            return
+
+        terminals = set()
+        for n in self.tokens:
+            if n in terminals:
+                self.log.warning('Token %r multiply defined', n)
+            terminals.add(n)
+
+    # Get the precedence map (if any)
+    def get_precedence(self):
+        self.prec = self.pdict.get('precedence')
+
+    # Validate and parse the precedence map
+    def validate_precedence(self):
+        preclist = []
+        if self.prec:
+            if not isinstance(self.prec, (list, tuple)):
+                self.log.error('precedence must be a list or tuple')
+                self.error = True
+                return
+            for level, p in enumerate(self.prec):
+                if not isinstance(p, (list, tuple)):
+                    self.log.error('Bad precedence table')
+                    self.error = True
+                    return
+
+                if len(p) < 2:
+                    self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
+                    self.error = True
+                    return
+                assoc = p[0]
+                if not isinstance(assoc, str):
+                    self.log.error('precedence associativity must be a string')
+                    self.error = True
+                    return
+                for term in p[1:]:
+                    if not isinstance(term, str):
+                        self.log.error('precedence items must be strings')
+                        self.error = True
+                        return
+                    preclist.append((term, assoc, level+1))
+        self.preclist = preclist
+
+    # Get all p_functions from the grammar
+    def get_pfunctions(self):
+        p_functions = []
+        for name, item in self.pdict.items():
+            if not name.startswith('p_') or name == 'p_error':
+                continue
+            if isinstance(item, (types.FunctionType, types.MethodType)):
+                line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
+                module = inspect.getmodule(item)
+                p_functions.append((line, module, name, item.__doc__))
+
+        # Sort all of the actions by line number; make sure to stringify
+        # modules to make them sortable, since `line` may not uniquely sort all
+        # p functions
+        p_functions.sort(key=lambda p_function: (
+            p_function[0],
+            str(p_function[1]),
+            p_function[2],
+            p_function[3]))
+        self.pfuncs = p_functions
+
+    # Validate all of the p_functions
+    def validate_pfunctions(self):
+        grammar = []
+        # Check for non-empty symbols
+        if len(self.pfuncs) == 0:
+            self.log.error('no rules of the form p_rulename are defined')
+            self.error = True
+            return
+
+        for line, module, name, doc in self.pfuncs:
+            file = inspect.getsourcefile(module)
+            func = self.pdict[name]
+            if isinstance(func, types.MethodType):
+                reqargs = 2
+            else:
+                reqargs = 1
+            if func.__code__.co_argcount > reqargs:
+                self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
+                self.error = True
+            elif func.__code__.co_argcount < reqargs:
+                self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
+                self.error = True
+            elif not func.__doc__:
+                self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
+                                 file, line, func.__name__)
+            else:
+                try:
+                    parsed_g = parse_grammar(doc, file, line)
+                    for g in parsed_g:
+                        grammar.append((name, g))
+                except SyntaxError as e:
+                    self.log.error(str(e))
+                    self.error = True
+
+                # Looks like a valid grammar rule
+                # Mark the file in which defined.
+                self.modules.add(module)
+
+        # Secondary validation step that looks for p_ definitions that are not functions
+        # or functions that look like they might be grammar rules.
+
+        for n, v in self.pdict.items():
+            if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
+                continue
+            if n.startswith('t_'):
+                continue
+            if n.startswith('p_') and n != 'p_error':
+                self.log.warning('%r not defined as a function', n)
+            if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
+                   (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
+                if v.__doc__:
+                    try:
+                        doc = v.__doc__.split(' ')
+                        if doc[1] == ':':
+                            self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
+                                             v.__code__.co_filename, v.__code__.co_firstlineno, n)
+                    except IndexError:
+                        pass
+
+        self.grammar = grammar
+
+# -----------------------------------------------------------------------------
+# yacc(module)
+#
+# Build a parser
+# -----------------------------------------------------------------------------
+
+def yacc(*, debug=yaccdebug, module=None, start=None,
+         check_recursion=True, optimize=False, debugfile=debug_file,
+         debuglog=None, errorlog=None):
+
+    # Reference to the parsing method of the last built parser
+    global parse
+
+    if errorlog is None:
+        errorlog = PlyLogger(sys.stderr)
+
+    # Get the module dictionary used for the parser
+    if module:
+        _items = [(k, getattr(module, k)) for k in dir(module)]
+        pdict = dict(_items)
+        # If no __file__ or __package__ attributes are available, try to obtain them
+        # from the __module__ instead
+        if '__file__' not in pdict:
+            pdict['__file__'] = sys.modules[pdict['__module__']].__file__
+        if '__package__' not in pdict and '__module__' in pdict:
+            if hasattr(sys.modules[pdict['__module__']], '__package__'):
+                pdict['__package__'] = sys.modules[pdict['__module__']].__package__
+    else:
+        pdict = get_caller_module_dict(2)
+
+    # Set start symbol if it's specified directly using an argument
+    if start is not None:
+        pdict['start'] = start
+
+    # Collect parser information from the dictionary
+    pinfo = ParserReflect(pdict, log=errorlog)
+    pinfo.get_all()
+
+    if pinfo.error:
+        raise YaccError('Unable to build parser')
+
+    if debuglog is None:
+        if debug:
+            try:
+                debuglog = PlyLogger(open(debugfile, 'w'))
+            except IOError as e:
+                errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
+                debuglog = NullLogger()
+        else:
+            debuglog = NullLogger()
+
+    debuglog.info('Created by PLY (http://www.dabeaz.com/ply)')
+
+    errors = False
+
+    # Validate the parser information
+    if pinfo.validate_all():
+        raise YaccError('Unable to build parser')
+
+    if not pinfo.error_func:
+        errorlog.warning('no p_error() function is defined')
+
+    # Create a grammar object
+    grammar = Grammar(pinfo.tokens)
+
+    # Set precedence level for terminals
+    for term, assoc, level in pinfo.preclist:
+        try:
+            grammar.set_precedence(term, assoc, level)
+        except GrammarError as e:
+            errorlog.warning('%s', e)
+
+    # Add productions to the grammar
+    for funcname, gram in pinfo.grammar:
+        file, line, prodname, syms = gram
+        try:
+            grammar.add_production(prodname, syms, funcname, file, line)
+        except GrammarError as e:
+            errorlog.error('%s', e)
+            errors = True
+
+    # Set the grammar start symbols
+    try:
+        if start is None:
+            grammar.set_start(pinfo.start)
+        else:
+            grammar.set_start(start)
+    except GrammarError as e:
+        errorlog.error(str(e))
+        errors = True
+
+    if errors:
+        raise YaccError('Unable to build parser')
+
+    # Verify the grammar structure
+    undefined_symbols = grammar.undefined_symbols()
+    for sym, prod in undefined_symbols:
+        errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
+        errors = True
+
+    unused_terminals = grammar.unused_terminals()
+    if unused_terminals:
+        debuglog.info('')
+        debuglog.info('Unused terminals:')
+        debuglog.info('')
+        for term in unused_terminals:
+            errorlog.warning('Token %r defined, but not used', term)
+            debuglog.info('    %s', term)
+
+    # Print out all productions to the debug log
+    if debug:
+        debuglog.info('')
+        debuglog.info('Grammar')
+        debuglog.info('')
+        for n, p in enumerate(grammar.Productions):
+            debuglog.info('Rule %-5d %s', n, p)
+
+    # Find unused non-terminals
+    unused_rules = grammar.unused_rules()
+    for prod in unused_rules:
+        errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
+
+    if len(unused_terminals) == 1:
+        errorlog.warning('There is 1 unused token')
+    if len(unused_terminals) > 1:
+        errorlog.warning('There are %d unused tokens', len(unused_terminals))
+
+    if len(unused_rules) == 1:
+        errorlog.warning('There is 1 unused rule')
+    if len(unused_rules) > 1:
+        errorlog.warning('There are %d unused rules', len(unused_rules))
+
+    if debug:
+        debuglog.info('')
+        debuglog.info('Terminals, with rules where they appear')
+        debuglog.info('')
+        terms = list(grammar.Terminals)
+        terms.sort()
+        for term in terms:
+            debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
+
+        debuglog.info('')
+        debuglog.info('Nonterminals, with rules where they appear')
+        debuglog.info('')
+        nonterms = list(grammar.Nonterminals)
+        nonterms.sort()
+        for nonterm in nonterms:
+            debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
+        debuglog.info('')
+
+    if check_recursion:
+        unreachable = grammar.find_unreachable()
+        for u in unreachable:
+            errorlog.warning('Symbol %r is unreachable', u)
+
+        infinite = grammar.infinite_cycles()
+        for inf in infinite:
+            errorlog.error('Infinite recursion detected for symbol %r', inf)
+            errors = True
+
+    unused_prec = grammar.unused_precedence()
+    for term, assoc in unused_prec:
+        errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
+        errors = True
+
+    if errors:
+        raise YaccError('Unable to build parser')
+
+    # Run the LRTable on the grammar
+    lr = LRTable(grammar, debuglog)
+
+    if debug:
+        num_sr = len(lr.sr_conflicts)
+
+        # Report shift/reduce and reduce/reduce conflicts
+        if num_sr == 1:
+            errorlog.warning('1 shift/reduce conflict')
+        elif num_sr > 1:
+            errorlog.warning('%d shift/reduce conflicts', num_sr)
+
+        num_rr = len(lr.rr_conflicts)
+        if num_rr == 1:
+            errorlog.warning('1 reduce/reduce conflict')
+        elif num_rr > 1:
+            errorlog.warning('%d reduce/reduce conflicts', num_rr)
+
+    # Write out conflicts to the output file
+    if debug and (lr.sr_conflicts or lr.rr_conflicts):
+        debuglog.warning('')
+        debuglog.warning('Conflicts:')
+        debuglog.warning('')
+
+        for state, tok, resolution in lr.sr_conflicts:
+            debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
+
+        already_reported = set()
+        for state, rule, rejected in lr.rr_conflicts:
+            if (state, id(rule), id(rejected)) in already_reported:
+                continue
+            debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
+            debuglog.warning('rejected rule (%s) in state %d', rejected, state)
+            errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
+            errorlog.warning('rejected rule (%s) in state %d', rejected, state)
+            already_reported.add((state, id(rule), id(rejected)))
+
+        warned_never = []
+        for state, rule, rejected in lr.rr_conflicts:
+            if not rejected.reduced and (rejected not in warned_never):
+                debuglog.warning('Rule (%s) is never reduced', rejected)
+                errorlog.warning('Rule (%s) is never reduced', rejected)
+                warned_never.append(rejected)
+
+    # Build the parser
+    lr.bind_callables(pinfo.pdict)
+    parser = LRParser(lr, pinfo.error_func)
+
+    parse = parser.parse
+    return parser
diff --git a/src/journal_lib/parse/preprocessing.py b/src/journal_lib/parse/preprocessing.py
new file mode 100644
index 0000000..6d4a0cf
--- /dev/null
+++ b/src/journal_lib/parse/preprocessing.py
@@ -0,0 +1,28 @@
+import re
+from pathlib import Path
+
+def preprocess_includes(filepath: Path):
+    """
+    Reads the file at 'filepath', processing any "include" directives,
+    and returns a single string containing the preprocessed file content.
+
+    This does not report circular includes, so that would become a infinite loop
+    """
+    INCLUDE_RE = re.compile(r'^\s*include\s+([^\n]+)\s*$', re.IGNORECASE)
+
+    def read_file(file_path):
+        with open(file_path, 'r') as file:
+            return file.readlines()
+
+    lines = read_file(filepath)
+    i = 0
+    while i < len(lines):
+        match = INCLUDE_RE.match(lines[i])
+        if match:
+            included_file_path = match.group(1)
+            included_lines = read_file(included_file_path)
+            lines[i:i+1] = included_lines
+        else:
+            i += 1
+
+    return ''.join(lines)
diff --git a/src/journal_lib/utils.py b/src/journal_lib/utils.py
new file mode 100644
index 0000000..b8f645d
--- /dev/null
+++ b/src/journal_lib/utils.py
@@ -0,0 +1,64 @@
+from pathlib import Path
+from journal_lib.dataclasses import Journal
+from journal_lib.parse import JournalParser, JournalLexer, preprocess_includes
+
+def journal_from_str(data: str, debug: bool = False) -> Journal:
+    """ Read a string of Journal entries into a Journal object """
+    if debug: print("= Building lexer ===========")
+    lexer = JournalLexer(debug=debug)
+    if debug: print("= Building parser ==========")
+    parser = JournalParser(debug=debug)
+    if debug: print("= PARSE ====================")
+    journal = parser.parse(data, lexer=lexer)
+    if debug: print("= JOURNAL ==================")
+    return journal
+
+def journal_from_file(filename: Path, debug: bool = False) -> Journal:
+    """ Read a journal file into a Journal object """
+    journal_raw = preprocess_includes(filename)
+    return journal_from_str(journal_raw, debug=debug)
+
+
+def test():
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument("-f", "--file", help="The journal file to read")
+    parser.add_argument("-d", "--debug", action="store_true", help="Print more debug information from lexing and parsing")
+    args = parser.parse_args()
+
+    if args.file is not None:
+        print(journal_from_file(Path(args.file), debug=args.debug))
+
+    else:
+        # Just a test string which includes most of the supported features of the parser
+        data = """
+account Expenses:account one
+account Assets:Cash            ; type:X
+
+commodity APPL
+
+commodity USD
+    note Us dollars
+    format 1000.00 NOK
+    nomarket
+    default
+
+2023-10-14=2023-10-14 * "Groceries"
+    ; entry comment
+    Expenses:account one  $50.00  ; post comment
+    Assets:Cash           -50 NOK
+    Assets:Cash           50 NOK
+    ; entry comment
+
+; global comment
+
+comment
+this is a block comment
+end comment
+
+2023/10/14 ! papers
+    Expenses:account one  $50.00
+    Assets:Cash          
+    """
+        print(journal_from_str(data, debug=args.debug))
+