diff options
author | jakobst1n <jakob.stendahl@outlook.com> | 2023-10-20 19:35:32 +0200 |
---|---|---|
committer | jakobst1n <jakob.stendahl@outlook.com> | 2023-10-20 19:35:32 +0200 |
commit | 81f0f8de331b382caad8e82348a3ccbac5bb150e (patch) | |
tree | fdaaeab78ba7cc55336fdbdfae258a15cf5b24f1 /src/journal_lib/parse/lexers/l_ledger.py | |
parent | 3bb38fcbbc9703ba22429441604d66f577fc6010 (diff) | |
download | journal-lib-81f0f8de331b382caad8e82348a3ccbac5bb150e.tar.gz journal-lib-81f0f8de331b382caad8e82348a3ccbac5bb150e.zip |
Run black, and do some changes for enabling better syntax expandability
Diffstat (limited to 'src/journal_lib/parse/lexers/l_ledger.py')
-rw-r--r-- | src/journal_lib/parse/lexers/l_ledger.py | 136 |
1 files changed, 75 insertions, 61 deletions
diff --git a/src/journal_lib/parse/lexers/l_ledger.py b/src/journal_lib/parse/lexers/l_ledger.py index 6f63626..2ab51bd 100644 --- a/src/journal_lib/parse/lexers/l_ledger.py +++ b/src/journal_lib/parse/lexers/l_ledger.py @@ -1,10 +1,12 @@ from journal_lib.parse.lexwrapper import LexWrapper + class JournalLexer(LexWrapper): states = ( ('sHEADER', 'exclusive'), # Entry header parsing state ('sHEADEREFF', 'exclusive'), # Entry header effective date parsing state ('sENTRY', 'exclusive'), # Entry parsing state + ('sENTRYCONTENT', 'exclusive'), # Entry parsing state ('sBLOCKCOMMENT', 'exclusive'), # Block comment parsing state ('sACCOUNT', 'exclusive'), # Account definition parsing state ('sCOMMODITY', 'exclusive'), # Commodity definition parsing state @@ -15,10 +17,15 @@ class JournalLexer(LexWrapper): "commodity": 'KW_COMMODITY' } + kw_state_map = { + "KW_ACCOUNT": "sACCOUNT", + "KW_COMMODITY": "sCOMMODITY", + } + tokens = ( 'TEXT', 'AMOUNT', - 'CURRENCY', + 'COMMODITY', 'COMMENT', 'INLINE_COMMENT', 'DATE', @@ -30,46 +37,47 @@ class JournalLexer(LexWrapper): 'COMMODITY_DEFAULT', ) + tuple(reserved.values()) - t_ANY_ignore = ' \t' + t_ANY_ignore = " \t" - literals = '\n' + literals = "\n" # Rules for the 'initial' state def t_INITIAL_DATE(self, t): - r'\d{4}(-|\/)\d{2}(-|\/)\d{2}' - self._state_begin('sHEADER', t) + r"\d{4}(-|\/)\d{2}(-|\/)\d{2}" + self._state_begin("sHEADER", t) return t def t_INITIAL_eof(self, t): pass def t_INITIAL_COMMENT(self, t): - r'(;|\#|\%|\||\*).+\n' + r"(;|\#|\%|\||\*).+\n" + t.value = t.value.lstrip() + if t.value[0] in [";", "#", "%", "|", "*"]: + t.value = t.value[1:] pass def t_INITIAL_BLOCKCOMMENT(self, t): - r'comment' - self._state_begin('sBLOCKCOMMENT', t) + r"comment" + self._state_begin("sBLOCKCOMMENT", t) def t_INITIAL_KEYWORD(self, t): - r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = self.reserved.get(t.value,'KW') - if t.type == "KW_ACCOUNT": - self._state_begin('sACCOUNT', t) - if t.type == "KW_COMMODITY": - self._state_begin('sCOMMODITY', t) + r"[a-zA-Z_][a-zA-Z_0-9]*" + t.type = self.reserved.get(t.value, "KW") + if (new_state := self.kw_state_map.get(t.type)) is not None: + self._state_begin(new_state, t) return t # Rules for the 'sBLOCKCOMMENT' state def t_sBLOCKCOMMENT_end(self, t): - r'end\scomment' - t.lexer.lineno += t.value.count('\n') - self._state_begin('INITIAL', t) + r"end\scomment" + t.lexer.lineno += t.value.count("\n") + self._state_begin("INITIAL", t) def t_sBLOCKCOMMENT_content(self, t): - r'.+?\n' + r".+?\n" def t_sBLOCKCOMMENT_error(self, t): r.lexer.skip(1) @@ -84,17 +92,20 @@ class JournalLexer(LexWrapper): return t def t_sACCOUNT_COMMENT(self, t): - r'(;)[^\n]*' + r"(;)[^\n]*" + t.value.lstrip() + if t.value[0] == ";": + t.value = t.value[1:] return t def t_sACCOUNT_newline(self, t): - r'\n' - self._state_begin('INITIAL', t) + r"\n" + self._state_begin("INITIAL", t) # Rules for the 'sCOMMODITY' state def t_sCOMMODITY_KW(self, t): - r'(note|format|nomarket|default)' + r"(note|format|nomarket|default)" if t.value == "note": t.type = "COMMODITY_NOTE" elif t.value == "format": @@ -107,62 +118,69 @@ class JournalLexer(LexWrapper): return t def t_sCOMMODITY_TEXT(self, t): - r'[^\n]+' + r"[^\n]+" return t def t_sCOMMODITY_newline(self, t): - r'\n(?=(\s*\n|\s*$|[^\s]))' - self._state_begin('INITIAL', t) + r"\n(?=(\s*\n|\s*$|[^\s]))" + self._state_begin("INITIAL", t) # Rules for the 'sheader' state def t_sHEADER_ENTRY_STATUS(self, t): - r'(\*|!)' + r"(\*|!)" return t def t_sHEADER_ENTRY_EFFECTIVE_DATE_SEPARATOR(self, t): - r'=' - self._state_begin('sHEADEREFF', t) + r"=" + self._state_begin("sHEADEREFF", t) return t def t_sHEADER_TEXT(self, t): - r'[^\n]+' - if ((t.value.startswith('"') and t.value.endswith('"')) - or (t.value.startswith("'") and t.value.endswith("'"))): - t.value = t.value[1:-1] + r"[^\n]+" + if (t.value.startswith('"') and t.value.endswith('"')) or ( + t.value.startswith("'") and t.value.endswith("'") + ): + t.value = t.value[1:-1] return t def t_sHEADER_newline(self, t): - r'\n' - self._state_begin('sENTRY', t) + r"\n" + self._state_begin("sENTRY", t) # Rules for the 'sheader_effective_date' state def t_sHEADEREFF_DATE(self, t): - r'\d{4}(-|\/)\d{2}(-|\/)\d{2}' - self._state_begin('sHEADER', t) + r"\d{4}(-|\/)\d{2}(-|\/)\d{2}" + self._state_begin("sHEADER", t) return t # Rules for the 'sentry' state - def t_sENTRY_DATE(self, t): - r'\d{4}(-|\/)\d{2}(-|\/)\d{2}' + def t_sENTRY_TEXT(self, t): + r"[^\n;]+?(?=\s{2,}|$|;)" + if t.value.startswith('"') and t.value.endswith('"'): + t.value = t.value[1:-1] + t.value = t.value.rstrip() + self._state_begin("sENTRYCONTENT") return t - def t_sENTRY_CURRENCY(self, t): - r'\$|NOK' + # Rules for the 'sENTRYCONTENT' state + + def t_sENTRYCONTENT_COMMODITY(self, t): + r"\$|NOK" return t - def t_sENTRY_AMOUNT(self, t): - r'(-)?(\d|\,)+(\.\d{2})?' + def t_sENTRYCONTENT_AMOUNT(self, t): + r"(-)?(\d|\,)+(\.\d{2})?" return t - def t_sENTRY_COMMENT(self, t): - r';[^\n]*' + def t_sENTRY_sENTRYCONTENT_COMMENT(self, t): + r";[^\n]*" # Check if the comment is at the start of a line (considering whitespaces) - line_start = t.lexer.lexdata.rfind('\n', 0, t.lexpos) + 1 - pre_comment = t.lexer.lexdata[line_start:t.lexpos] - + line_start = t.lexer.lexdata.rfind("\n", 0, t.lexpos) + 1 + pre_comment = t.lexer.lexdata[line_start : t.lexpos] + # If the comment is at the start of a line, it's a standalone comment if pre_comment.isspace() or pre_comment == "": t.type = "COMMENT" @@ -170,27 +188,23 @@ class JournalLexer(LexWrapper): t.type = "INLINE_COMMENT" return t - def t_sENTRY_TEXT(self, t): - r'[^\n;]+?(?=\s{2,}|$|;)' - if t.value.startswith('"') and t.value.endswith('"'): - t.value = t.value[1:-1] - t.value = t.value.rstrip() - return t - - def t_sENTRY_newline(self, t): - r'\n\n' - self._state_begin('INITIAL', t) + def t_sENTRYCONTENT_newline(self, t): + r"\n" + self._state_begin("sENTRY", t) - def t_sENTRY_eof(self, t): - self._state_begin('INITIAL', t) + def t_sENTRY_sENTRYCONTENT_double_newline(self, t): + r"\n\n" + self._state_begin("INITIAL", t) # Common rules + def t_ANY_eof(self, t): + self._state_begin("INITIAL", t) + def t_ANY_newline(self, t): - r'\n+' + r"\n+" t.lexer.lineno += len(t.value) def t_ANY_error(self, t): self._hl_token(t) t.lexer.skip(1) - |