diff options
author | jakobst1n <jakob.stendahl@outlook.com> | 2023-10-16 20:54:46 +0200 |
---|---|---|
committer | jakobst1n <jakob.stendahl@outlook.com> | 2023-10-16 20:54:46 +0200 |
commit | 3bb38fcbbc9703ba22429441604d66f577fc6010 (patch) | |
tree | ec90232fb8b6502d1efc6a6c6a748f15bb3fcc0e /src/journal_lib/parse/lexwrapper.py | |
download | journal-lib-3bb38fcbbc9703ba22429441604d66f577fc6010.tar.gz journal-lib-3bb38fcbbc9703ba22429441604d66f577fc6010.zip |
Initial commit
Diffstat (limited to 'src/journal_lib/parse/lexwrapper.py')
-rw-r--r-- | src/journal_lib/parse/lexwrapper.py | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/src/journal_lib/parse/lexwrapper.py b/src/journal_lib/parse/lexwrapper.py new file mode 100644 index 0000000..6a3989e --- /dev/null +++ b/src/journal_lib/parse/lexwrapper.py @@ -0,0 +1,76 @@ +from .ply import lex +import sys + +class LexWrapper(object): + state_trail = ["INITIAL"] + + def _state_begin(self, state: str, t = None): + """ Convenient wrapper for the lexer.begin, which makes it possible to track state changes. """ + self.lexer.begin(state) + self.state_trail.append(self.lexer.current_state()) + + if len(self.state_trail) > 5: + self.state_trail = self.state_trail[-5:] + + if self.debug: + d = f"{' ':{self.max_token_name_length+2}}{self.state_trail[-2]} -> {self.state_trail[-1]}" + if t is not None: + d += ", recognized [{}] \"{}\"".format(t.type, t.value.replace("\n", "\\n")) + self.debuglog.info(d) + + def __init__(self, debug: bool = False): + """ Initialize a new JournalLexer """ + self.build(debug=debug) + self.debug = debug + if self.debug: + self.debuglog = lex.PlyLogger(sys.stderr) + self.max_token_name_length = max(len(x)+1 for x in self.tokens) + + def build(self, **kwargs): + """ Reinitialize the lexer module (this is called on __init__) """ + self.lexer = lex.lex(module=self, **kwargs) + + def input(self, s: str): + """ Wrapper for the lex input function """ + self.lexer.input(s) + + def token(self): + """ Wrapper for the lex token function, can print debug information to stdout if debug is enabled """ + tok = self.lexer.token() + if self.debug and tok: + self.debuglog.info("[{:<{width}} ({}:{}) \"{}\"".format( + tok.type + "]", + tok.lineno, + tok.lexpos, + tok.value.replace("\n", "\\n"), + width=self.max_token_name_length, + )) + return tok + + def print_tokens(self, data): + """ Simple debugging function which will trigger a tokenization of all the data provided """ + self.input(data) + _debug = self.debug + self.debug = True + while True: + self.lexer.token() + self.debug = _debug + + def _hl_token(self, t): + try: + linestart = t.lexer.lexdata.rfind("\n", 0, t.lexpos) + 1 + lineend = t.lexer.lexdata.find("\n", t.lexpos) + markpos = t.lexpos - linestart + lineno = t.lexer.lexdata[0:linestart+1].count("\n") + print(f"Illegal character at '{t.value[0]}' on line {lineno}, position {markpos}") + print(f" {t.lexer.lexdata[linestart:lineend]}") + print(f" {' ' * markpos}^") + except Exception as e: + print(f"Illegal character '{p.value}'") + print(f"Additionally a error occuren when showing the position of the illegal character\n{e}") + + @property + def lexdata(self): + if hasattr(self, "lexer"): + return self.lexer.lexdata + return None |