aboutsummaryrefslogtreecommitdiff
path: root/src/journal_lib/parse/lexers/l_ledger.py
diff options
context:
space:
mode:
authorjakobst1n <jakob.stendahl@outlook.com>2023-10-20 19:35:32 +0200
committerjakobst1n <jakob.stendahl@outlook.com>2023-10-20 19:35:32 +0200
commit81f0f8de331b382caad8e82348a3ccbac5bb150e (patch)
treefdaaeab78ba7cc55336fdbdfae258a15cf5b24f1 /src/journal_lib/parse/lexers/l_ledger.py
parent3bb38fcbbc9703ba22429441604d66f577fc6010 (diff)
downloadjournal-lib-81f0f8de331b382caad8e82348a3ccbac5bb150e.tar.gz
journal-lib-81f0f8de331b382caad8e82348a3ccbac5bb150e.zip
Run black, and do some changes for enabling better syntax expandability
Diffstat (limited to 'src/journal_lib/parse/lexers/l_ledger.py')
-rw-r--r--src/journal_lib/parse/lexers/l_ledger.py136
1 files changed, 75 insertions, 61 deletions
diff --git a/src/journal_lib/parse/lexers/l_ledger.py b/src/journal_lib/parse/lexers/l_ledger.py
index 6f63626..2ab51bd 100644
--- a/src/journal_lib/parse/lexers/l_ledger.py
+++ b/src/journal_lib/parse/lexers/l_ledger.py
@@ -1,10 +1,12 @@
from journal_lib.parse.lexwrapper import LexWrapper
+
class JournalLexer(LexWrapper):
states = (
('sHEADER', 'exclusive'), # Entry header parsing state
('sHEADEREFF', 'exclusive'), # Entry header effective date parsing state
('sENTRY', 'exclusive'), # Entry parsing state
+ ('sENTRYCONTENT', 'exclusive'), # Entry parsing state
('sBLOCKCOMMENT', 'exclusive'), # Block comment parsing state
('sACCOUNT', 'exclusive'), # Account definition parsing state
('sCOMMODITY', 'exclusive'), # Commodity definition parsing state
@@ -15,10 +17,15 @@ class JournalLexer(LexWrapper):
"commodity": 'KW_COMMODITY'
}
+ kw_state_map = {
+ "KW_ACCOUNT": "sACCOUNT",
+ "KW_COMMODITY": "sCOMMODITY",
+ }
+
tokens = (
'TEXT',
'AMOUNT',
- 'CURRENCY',
+ 'COMMODITY',
'COMMENT',
'INLINE_COMMENT',
'DATE',
@@ -30,46 +37,47 @@ class JournalLexer(LexWrapper):
'COMMODITY_DEFAULT',
) + tuple(reserved.values())
- t_ANY_ignore = ' \t'
+ t_ANY_ignore = " \t"
- literals = '\n'
+ literals = "\n"
# Rules for the 'initial' state
def t_INITIAL_DATE(self, t):
- r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
- self._state_begin('sHEADER', t)
+ r"\d{4}(-|\/)\d{2}(-|\/)\d{2}"
+ self._state_begin("sHEADER", t)
return t
def t_INITIAL_eof(self, t):
pass
def t_INITIAL_COMMENT(self, t):
- r'(;|\#|\%|\||\*).+\n'
+ r"(;|\#|\%|\||\*).+\n"
+ t.value = t.value.lstrip()
+ if t.value[0] in [";", "#", "%", "|", "*"]:
+ t.value = t.value[1:]
pass
def t_INITIAL_BLOCKCOMMENT(self, t):
- r'comment'
- self._state_begin('sBLOCKCOMMENT', t)
+ r"comment"
+ self._state_begin("sBLOCKCOMMENT", t)
def t_INITIAL_KEYWORD(self, t):
- r'[a-zA-Z_][a-zA-Z_0-9]*'
- t.type = self.reserved.get(t.value,'KW')
- if t.type == "KW_ACCOUNT":
- self._state_begin('sACCOUNT', t)
- if t.type == "KW_COMMODITY":
- self._state_begin('sCOMMODITY', t)
+ r"[a-zA-Z_][a-zA-Z_0-9]*"
+ t.type = self.reserved.get(t.value, "KW")
+ if (new_state := self.kw_state_map.get(t.type)) is not None:
+ self._state_begin(new_state, t)
return t
# Rules for the 'sBLOCKCOMMENT' state
def t_sBLOCKCOMMENT_end(self, t):
- r'end\scomment'
- t.lexer.lineno += t.value.count('\n')
- self._state_begin('INITIAL', t)
+ r"end\scomment"
+ t.lexer.lineno += t.value.count("\n")
+ self._state_begin("INITIAL", t)
def t_sBLOCKCOMMENT_content(self, t):
- r'.+?\n'
+ r".+?\n"
def t_sBLOCKCOMMENT_error(self, t):
r.lexer.skip(1)
@@ -84,17 +92,20 @@ class JournalLexer(LexWrapper):
return t
def t_sACCOUNT_COMMENT(self, t):
- r'(;)[^\n]*'
+ r"(;)[^\n]*"
+ t.value.lstrip()
+ if t.value[0] == ";":
+ t.value = t.value[1:]
return t
def t_sACCOUNT_newline(self, t):
- r'\n'
- self._state_begin('INITIAL', t)
+ r"\n"
+ self._state_begin("INITIAL", t)
# Rules for the 'sCOMMODITY' state
def t_sCOMMODITY_KW(self, t):
- r'(note|format|nomarket|default)'
+ r"(note|format|nomarket|default)"
if t.value == "note":
t.type = "COMMODITY_NOTE"
elif t.value == "format":
@@ -107,62 +118,69 @@ class JournalLexer(LexWrapper):
return t
def t_sCOMMODITY_TEXT(self, t):
- r'[^\n]+'
+ r"[^\n]+"
return t
def t_sCOMMODITY_newline(self, t):
- r'\n(?=(\s*\n|\s*$|[^\s]))'
- self._state_begin('INITIAL', t)
+ r"\n(?=(\s*\n|\s*$|[^\s]))"
+ self._state_begin("INITIAL", t)
# Rules for the 'sheader' state
def t_sHEADER_ENTRY_STATUS(self, t):
- r'(\*|!)'
+ r"(\*|!)"
return t
def t_sHEADER_ENTRY_EFFECTIVE_DATE_SEPARATOR(self, t):
- r'='
- self._state_begin('sHEADEREFF', t)
+ r"="
+ self._state_begin("sHEADEREFF", t)
return t
def t_sHEADER_TEXT(self, t):
- r'[^\n]+'
- if ((t.value.startswith('"') and t.value.endswith('"'))
- or (t.value.startswith("'") and t.value.endswith("'"))):
- t.value = t.value[1:-1]
+ r"[^\n]+"
+ if (t.value.startswith('"') and t.value.endswith('"')) or (
+ t.value.startswith("'") and t.value.endswith("'")
+ ):
+ t.value = t.value[1:-1]
return t
def t_sHEADER_newline(self, t):
- r'\n'
- self._state_begin('sENTRY', t)
+ r"\n"
+ self._state_begin("sENTRY", t)
# Rules for the 'sheader_effective_date' state
def t_sHEADEREFF_DATE(self, t):
- r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
- self._state_begin('sHEADER', t)
+ r"\d{4}(-|\/)\d{2}(-|\/)\d{2}"
+ self._state_begin("sHEADER", t)
return t
# Rules for the 'sentry' state
- def t_sENTRY_DATE(self, t):
- r'\d{4}(-|\/)\d{2}(-|\/)\d{2}'
+ def t_sENTRY_TEXT(self, t):
+ r"[^\n;]+?(?=\s{2,}|$|;)"
+ if t.value.startswith('"') and t.value.endswith('"'):
+ t.value = t.value[1:-1]
+ t.value = t.value.rstrip()
+ self._state_begin("sENTRYCONTENT")
return t
- def t_sENTRY_CURRENCY(self, t):
- r'\$|NOK'
+ # Rules for the 'sENTRYCONTENT' state
+
+ def t_sENTRYCONTENT_COMMODITY(self, t):
+ r"\$|NOK"
return t
- def t_sENTRY_AMOUNT(self, t):
- r'(-)?(\d|\,)+(\.\d{2})?'
+ def t_sENTRYCONTENT_AMOUNT(self, t):
+ r"(-)?(\d|\,)+(\.\d{2})?"
return t
- def t_sENTRY_COMMENT(self, t):
- r';[^\n]*'
+ def t_sENTRY_sENTRYCONTENT_COMMENT(self, t):
+ r";[^\n]*"
# Check if the comment is at the start of a line (considering whitespaces)
- line_start = t.lexer.lexdata.rfind('\n', 0, t.lexpos) + 1
- pre_comment = t.lexer.lexdata[line_start:t.lexpos]
-
+ line_start = t.lexer.lexdata.rfind("\n", 0, t.lexpos) + 1
+ pre_comment = t.lexer.lexdata[line_start : t.lexpos]
+
# If the comment is at the start of a line, it's a standalone comment
if pre_comment.isspace() or pre_comment == "":
t.type = "COMMENT"
@@ -170,27 +188,23 @@ class JournalLexer(LexWrapper):
t.type = "INLINE_COMMENT"
return t
- def t_sENTRY_TEXT(self, t):
- r'[^\n;]+?(?=\s{2,}|$|;)'
- if t.value.startswith('"') and t.value.endswith('"'):
- t.value = t.value[1:-1]
- t.value = t.value.rstrip()
- return t
-
- def t_sENTRY_newline(self, t):
- r'\n\n'
- self._state_begin('INITIAL', t)
+ def t_sENTRYCONTENT_newline(self, t):
+ r"\n"
+ self._state_begin("sENTRY", t)
- def t_sENTRY_eof(self, t):
- self._state_begin('INITIAL', t)
+ def t_sENTRY_sENTRYCONTENT_double_newline(self, t):
+ r"\n\n"
+ self._state_begin("INITIAL", t)
# Common rules
+ def t_ANY_eof(self, t):
+ self._state_begin("INITIAL", t)
+
def t_ANY_newline(self, t):
- r'\n+'
+ r"\n+"
t.lexer.lineno += len(t.value)
def t_ANY_error(self, t):
self._hl_token(t)
t.lexer.skip(1)
-