1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
|
-- luainspect.ast - Lua Abstract Syntax Tree (AST) and token list operations.
--
-- Two main structures are maintained. A Metalua-style AST represents the
-- nested syntactic structure obtained from the parse.
-- A separate linear ordered list of tokens represents the syntactic structure
-- from the lexing, including line information (character positions only not row/columns),
-- comments, and keywords, which is originally built from the lineinfo attributes
-- injected by Metalua into the AST (IMPROVE: it probably would be simpler
-- to obtain this from the lexer directly rather then inferring it from the parsing).
-- During AST manipulations, the lineinfo maintained in the AST is ignored
-- because it was found more difficult to maintain and not in the optimal format.
--
-- The contained code deals with
-- - Building the AST from source.
-- - Building the tokenlist from the AST lineinfo.
-- - Querying the AST+tokenlist.
-- - Modifying the AST+tokenlist (including incremental parsing source -> AST)
-- - Annotating the AST with navigational info (e.g. parent links) to assist queries.
-- - Dumping the tokenlist for debugging.
--
-- (c) 2010 David Manura, MIT License.
--! require 'luainspect.typecheck' (context)
local mlc = require 'metalua.compiler'.new()
local M = {}
--[=TESTSUITE
-- utilities
local ops = {}
ops['=='] = function(a,b) return a == b end
local function check(opname, a, b)
local op = assert(ops[opname])
if not op(a,b) then
error("fail == " .. tostring(a) .. " " .. tostring(b))
end
end
--]=]
-- CATEGORY: debug
local function DEBUG(...)
if LUAINSPECT_DEBUG then
print('DEBUG:', ...)
end
end
-- Converts character position to row,column position in string src.
-- Add values are 1-indexed.
function M.pos_to_linecol(pos, src)
local linenum = 1
local lasteolpos = 0
for eolpos in src:gmatch"()\n" do
if eolpos > pos then break end
linenum = linenum + 1
lasteolpos = eolpos
end
local colnum = pos - lasteolpos
return linenum, colnum
end
-- Removes any sheband ("#!") line from Lua source string.
-- CATEGORY: Lua parsing
function M.remove_shebang(src)
local shebang = src:match("^#![^\r\n]*")
return shebang and (" "):rep(#shebang) .. src:sub(#shebang+1) or src
end
-- Custom version of loadstring that parses out line number info
-- CATEGORY: Lua parsing
function M.loadstring(src)
local f, err = loadstring(src, "")
if f then
return f
else
err = err:gsub('^%[string ""%]:', "")
local linenum = assert(err:match("(%d+):"))
local colnum = 0
local linenum2 = err:match("^%d+: '[^']+' expected %(to close '[^']+' at line (%d+)")
return nil, err, linenum, colnum, linenum2
end
end
-- helper for ast_from_string. Raises on error.
-- FIX? filename currently ignored in Metalua
-- CATEGORY: Lua parsing
local function ast_from_string_helper(src, filename)
return mlc:src_to_ast(src, filename)
end
-- Counts number of lines in text.
-- Warning: the decision of whether to count a trailing new-line in a file
-- or an empty file as a line is a little subjective. This function currently
-- defines the line count as 1 plus the number of new line characters.
-- CATEGORY: utility/string
local function linecount(text)
local n = 1
for _ in text:gmatch'\n' do
n = n + 1
end
return n
end
-- Converts Lua source string to Lua AST (via mlp/gg).
-- CATEGORY: Lua parsing
function M.ast_from_string(src, filename)
local ok, ast = pcall(ast_from_string_helper, src, filename)
if not ok then
local err = ast
err = err:match('[^\n]*')
err = err:gsub("^.-:%s*line", "line")
-- mlp.chunk prepending this is undesirable. error(msg,0) would be better in gg.lua. Reported.
-- TODO-Metalua: remove when fixed in Metalua.
local linenum, colnum = err:match("line (%d+), char (%d+)")
if not linenum then
-- Metalua libraries may return "...gg.lua:56: .../mlp_misc.lua:179: End-of-file expected"
-- without the normal line/char numbers given things like "if x then end end". Should be
-- fixed probably with gg.parse_error in _chunk in mlp_misc.lua.
-- TODO-Metalua: remove when fixed in Metalua.
linenum = linecount(src)
colnum = 1
end
local linenum2 = nil
return nil, err, linenum, colnum, linenum2
else
return ast
end
end
-- Simple comment parser. Returns Metalua-style comment.
-- CATEGORY: Lua lexing
local function quick_parse_comment(src)
local s = src:match"^%-%-([^\n]*)()\n$"
if s then return {s, 1, #src, 'short'} end
local _, s = src:match(lexer.lexer.patterns.long_comment .. '\r?\n?$')
if s then return {s, 1, #src, 'long'} end
return nil
end
--FIX:check new-line correctness
--note: currently requiring \n at end of single line comment to avoid
-- incremental compilation with `--x\nf()` and removing \n from still
-- recognizing as comment `--x`.
-- currently allowing \r\n at end of long comment since Metalua includes
-- it in lineinfo of long comment (FIX:Metalua?)
-- Gets length of longest prefix string in both provided strings.
-- Returns max n such that text1:sub(1,n) == text2:sub(1,n) and n <= max(#text1,#text2)
-- CATEGORY: string utility
local function longest_prefix(text1, text2)
local nmin = 0
local nmax = math.min(#text1, #text2)
while nmax > nmin do
local nmid = math.ceil((nmin+nmax)/2)
if text1:sub(1,nmid) == text2:sub(1,nmid) then
nmin = nmid
else
nmax = nmid-1
end
end
return nmin
end
-- Gets length of longest postfix string in both provided strings.
-- Returns max n such that text1:sub(-n) == text2:sub(-n) and n <= max(#text1,#text2)
-- CATEGORY: string utility
local function longest_postfix(text1, text2)
local nmin = 0
local nmax = math.min(#text1, #text2)
while nmax > nmin do
local nmid = math.ceil((nmin+nmax)/2)
if text1:sub(-nmid) == text2:sub(-nmid) then --[*]
nmin = nmid
else
nmax = nmid-1
end
end
return nmin
end -- differs from longest_prefix only on line [*]
-- Determines AST node that must be re-evaluated upon changing code string from
-- `src` to `bsrc`, given previous top_ast/tokenlist/src.
-- Note: decorates top_ast as side-effect.
-- If preserve is true, then does not expand AST match even if replacement is invalid.
-- CATEGORY: AST/tokenlist manipulation
function M.invalidated_code(top_ast, tokenlist, src, bsrc, preserve)
-- Converts posiiton range in src to position range in bsrc.
local function range_transform(src_fpos, src_lpos)
local src_nlpos = #src - src_lpos
local bsrc_fpos = src_fpos
local bsrc_lpos = #bsrc - src_nlpos
return bsrc_fpos, bsrc_lpos
end
if src == bsrc then return end -- up-to-date
-- Find range of positions in src that differences correspond to.
-- Note: for zero byte range, src_pos2 = src_pos1 - 1.
local npre = longest_prefix(src, bsrc)
local npost = math.min(#src-npre, longest_postfix(src, bsrc))
-- note: min avoids overlap ambiguity
local src_fpos, src_lpos = 1 + npre, #src - npost
-- Find smallest AST node containing src range above. May also
-- be contained in (smaller) comment or whitespace.
local match_ast, match_comment, iswhitespace =
M.smallest_ast_containing_range(top_ast, tokenlist, src_fpos, src_lpos)
DEBUG('invalidate-smallest:', match_ast and (match_ast.tag or 'notag'), match_comment, iswhitespace)
-- Determine which (ast, comment, or whitespace) to match, and get its pos range in src and bsrc.
local srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
if iswhitespace then
mast, mtype = nil, 'whitespace'
srcm_fpos, srcm_lpos = src_fpos, src_lpos
elseif match_comment then
mast, mtype = match_comment, 'comment'
srcm_fpos, srcm_lpos = match_comment.fpos, match_comment.lpos
else
mast, mtype = match_ast, 'ast'
repeat
srcm_fpos, srcm_lpos = M.ast_pos_range(mast, tokenlist)
if not srcm_fpos then
if mast == top_ast then
srcm_fpos, srcm_lpos = 1, #src
break
else
M.ensure_parents_marked(top_ast)
mast = mast.parent
end
end
until srcm_fpos
end
bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
-- Never expand match if preserve specified.
if preserve then
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
end
-- Determine if replacement could break parent nodes.
local isreplacesafe
if mtype == 'whitespace' then
if bsrc:sub(bsrcm_fpos, bsrcm_lpos):match'^%s*$' then -- replaced with whitespace
if bsrc:sub(bsrcm_fpos-1, bsrcm_lpos+1):match'%s' then -- not eliminating whitespace
isreplacesafe = true
end
end
elseif mtype == 'comment' then
local m2src = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
DEBUG('invalidate-comment[' .. m2src .. ']')
if quick_parse_comment(m2src) then -- replaced with comment
isreplacesafe = true
end
end
if isreplacesafe then -- return on safe replacement
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
end
-- Find smallest containing statement block that will compile (or top_ast).
while 1 do
match_ast = M.get_containing_statementblock(match_ast, top_ast)
if match_ast == top_ast then
return 1,#src, 1, #bsrc, match_ast, 'statblock'
-- entire AST invalidated
end
local srcm_fpos, srcm_lpos = M.ast_pos_range(match_ast, tokenlist)
local bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
local msrc = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
DEBUG('invalidate-statblock:', match_ast and match_ast.tag, '[' .. msrc .. ']')
if loadstring(msrc) then -- compiled
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, match_ast, 'statblock'
end
M.ensure_parents_marked(top_ast)
match_ast = match_ast.parent
end
end
-- Walks AST `ast` in arbitrary order, visiting each node `n`, executing `fdown(n)` (if specified)
-- when doing down and `fup(n)` (if specified) when going if.
-- CATEGORY: AST walk
function M.walk(ast, fdown, fup)
assert(type(ast) == 'table')
if fdown then fdown(ast) end
for _,bast in ipairs(ast) do
if type(bast) == 'table' then
M.walk(bast, fdown, fup)
end
end
if fup then fup(ast) end
end
-- Replaces contents of table t1 with contents of table t2.
-- Does not change metatable (if any).
-- This function is useful for swapping one AST node with another
-- while preserving any references to the node.
-- CATEGORY: table utility
function M.switchtable(t1, t2)
for k in pairs(t1) do t1[k] = nil end
for k in pairs(t2) do t1[k] = t2[k] end
end
-- Inserts all elements in list bt at index i in list t.
-- CATEGORY: table utility
local function tinsertlist(t, i, bt)
local oldtlen, delta = #t, i - 1
for ti = #t + 1, #t + #bt do t[ti] = false end -- preallocate (avoid holes)
for ti = oldtlen, i, -1 do t[ti + #bt] = t[ti] end -- shift
for bi = 1, #bt do t[bi + delta] = bt[bi] end -- fill
end
--[=[TESTSUITE:
local function _tinsertlist(t, i, bt)
for bi=#bt,1,-1 do table.insert(t, i, bt[bi]) end
end -- equivalent but MUCH less efficient for large tables
local function _tinsertlist(t, i, bt)
for bi=1,#bt do table.insert(t, i+bi-1, bt[bi]) end
end -- equivalent but MUCH less efficient for large tables
local t = {}; tinsertlist(t, 1, {}); assert(table.concat(t)=='')
local t = {}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='23')
local t = {4}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='234')
local t = {2}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='234')
local t = {4,5}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='2345')
local t = {2,5}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='2345')
local t = {2,3}; tinsertlist(t, 3, {4,5}); assert(table.concat(t)=='2345')
print 'DONE'
--]=]
-- Gets list of keyword positions related to node ast in source src
-- note: ast must be visible, i.e. have lineinfo (e.g. unlike `Id "self" definition).
-- Note: includes operators.
-- Note: Assumes ast Metalua-style lineinfo is valid.
-- CATEGORY: tokenlist build
function M.get_keywords(ast, src)
local list = {}
if not ast.lineinfo then return list end
-- examine space between each pair of children i and j.
-- special cases: 0 is before first child and #ast+1 is after last child
-- Put children in lexical order.
-- Some binary operations have arguments reversed from lexical order.
-- For example, `a > b` becomes `Op{'lt', `Id 'b', `Id 'a'}
local oast =
(ast.tag == 'Op' and #ast == 3 and tostring(ast[2].lineinfo.first):match('|L(%d+)') > tostring(ast[3].lineinfo.first):match('|L(%d+)'))
and {ast[1], ast[3], ast[2]} or ast
local i = 0
while i <= #ast do
-- j is node following i that has lineinfo
local j = i+1; while j < #ast+1 and not oast[j].lineinfo do j=j+1 end
-- Get position range [fpos,lpos] between subsequent children.
local fpos
if i == 0 then -- before first child
fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
else
local last = oast[i].lineinfo.last; local c = last.comments
fpos = (c and #c > 0 and c[#c][3] or tostring(last):match('|L(%d+)')) + 1
end
local lpos
if j == #ast+1 then -- after last child
lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
else
local first = oast[j].lineinfo.first; local c = first.comments
lpos = (c and #c > 0 and c[1][2] or tostring(first):match('|L(%d+)')) - 1
end
-- Find keyword in range.
local spos = fpos
repeat
local mfpos, tok, mlppos = src:match("^%s*()(%a+)()", spos)
if not mfpos then
mfpos, tok, mlppos = src:match("^%s*()(%p+)()", spos)
end
if mfpos then
local mlpos = mlppos-1
if mlpos > lpos then mlpos = lpos end
if mlpos >= mfpos then
list[#list+1] = mfpos
list[#list+1] = mlpos
end
end
spos = mlppos
until not spos or spos > lpos
-- note: finds single keyword. in `local function` returns only `local`
--DEBUG(i,j ,'test[' .. src:sub(fpos, lpos) .. ']')
i = j -- next
--DESIGN:Lua: comment: string.match accepts a start position but not a stop position
end
return list
end
-- Q:Metalua: does ast.lineinfo[loc].comments imply #ast.lineinfo[loc].comments > 0 ?
-- Generates ordered list of tokens in top_ast/src.
-- Note: currently ignores operators and parens.
-- Note: Modifies ast.
-- Note: Assumes ast Metalua-style lineinfo is valid.
-- CATEGORY: AST/tokenlist query
local isterminal = {Nil=true, Dots=true, True=true, False=true, Number=true, String=true,
Dots=true, Id=true}
local function compare_tokens_(atoken, btoken) return atoken.fpos < btoken.fpos end
function M.ast_to_tokenlist(top_ast, src)
local tokens = {} -- {nbytes=#src}
local isseen = {}
M.walk(top_ast, function(ast)
if isterminal[ast.tag] then -- Extract terminal
local token = ast
if ast.lineinfo then
token.fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
token.lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
token.ast = ast
table.insert(tokens, token)
end
else -- Extract non-terminal
local keywordposlist = M.get_keywords(ast, src)
for i=1,#keywordposlist,2 do
local fpos, lpos = keywordposlist[i], keywordposlist[i+1]
local toksrc = src:sub(fpos, lpos)
local token = {tag='Keyword', fpos=fpos, lpos=lpos, ast=ast, toksrc}
table.insert(tokens, token)
end
end
-- Extract comments
for i=1,2 do
local comments = ast.lineinfo and ast.lineinfo[i==1 and 'first' or 'last'].comments
if comments then for _, comment in ipairs(comments) do
if not isseen[comment] then
comment.tag = 'Comment'
local token = comment
token.fpos = tonumber(tostring(comment.lineinfo.first):match('|L(%d+)'))
token.lpos = tonumber(tostring(comment.lineinfo.last):match('|L(%d+)'))
token.ast = comment
table.insert(tokens, token)
isseen[comment] = true
end
end end
end
end, nil)
table.sort(tokens, compare_tokens_)
return tokens
end
-- Gets tokenlist range [fidx,lidx] covered by ast. Returns nil,nil if not found.
--FIX:PERFORMANCE:this is slow on large files.
-- CATEGORY: AST/tokenlist query
function M.ast_idx_range_in_tokenlist(tokenlist, ast)
-- Get list of primary nodes under ast.
local isold = {}; M.walk(ast, function(ast) isold[ast] = true end)
-- Get range.
local fidx, lidx
for idx=1,#tokenlist do
local token = tokenlist[idx]
if isold[token.ast] then
lidx = idx
if not fidx then fidx = idx end
end
end
return fidx, lidx
end
-- Gets index range in tokenlist overlapped by character position range [fpos, lpos].
-- For example, `do ff() end` with range ` ff() ` would match tokens `ff()`.
-- Tokens partly inside range are counted, so range `f()` would match tokens `ff()`.
-- If lidx = fidx - 1, then position range is whitespace between tokens lidx (on left)
-- and fidx (on right), and this may include token pseudoindices 0 (start of file) and
-- #tokenlist+1 (end of file).
-- Note: lpos == fpos - 1 indicates zero-width range between chars lpos and fpos.
-- CATEGORY: tokenlist query
function M.tokenlist_idx_range_over_pos_range(tokenlist, fpos, lpos)
-- Find first/last indices of tokens overlapped (even partly) by position range.
local fidx, lidx
for idx=1,#tokenlist do
local token = tokenlist[idx]
--if (token.fpos >= fpos and token.fpos <= lpos) or (token.lpos >= fpos and token.lpos <= lpos) then -- token overlaps range
if fpos <= token.lpos and lpos >= token.fpos then -- range overlaps token (even partially)
if not fidx then fidx = idx end
lidx = idx
end
end
if not fidx then -- on fail, check between tokens
for idx=1,#tokenlist+1 do -- between idx-1 and idx
local tokfpos, toklpos = tokenlist[idx-1] and tokenlist[idx-1].lpos, tokenlist[idx] and tokenlist[idx].fpos
if (not tokfpos or fpos > tokfpos) and (not toklpos or lpos < toklpos) then -- range between tokens
return idx, idx-1
end
end
end
return fidx, lidx
end
--[=[TESTSUITE
local function test(...)
return table.concat({M.tokenlist_idx_range_over_pos_range(...)}, ',')
end
check('==', test({}, 2, 2), "1,0") -- no tokens
check('==', test({{tag='Id', fpos=1, lpos=1}}, 2, 2), "2,1") -- right of one token
check('==', test({{tag='Id', fpos=3, lpos=3}}, 2, 2), "1,0") -- left of one token
check('==', test({{tag='Id', fpos=3, lpos=4}}, 2, 3), "1,1") -- left partial overlap one token
check('==', test({{tag='Id', fpos=3, lpos=4}}, 4, 5), "1,1") -- right partial overlap one token
check('==', test({{tag='Id', fpos=3, lpos=6}}, 4, 5), "1,1") -- partial inner overlap one token
check('==', test({{tag='Id', fpos=3, lpos=6}}, 3, 6), "1,1") -- exact overlap one token
check('==', test({{tag='Id', fpos=4, lpos=5}}, 3, 6), "1,1") -- extra overlap one token
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 4), "2,1") -- between tokens, " " exact
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 3), "2,1") -- between tokens, "" on left
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 5, 4), "2,1") -- between tokens, "" on right
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=4, lpos=5}}, 4, 3), "2,1") -- between tokens, "" exact
--]=]
-- Removes tokens in tokenlist covered by ast.
-- CATEGORY: tokenlist manipulation
local function remove_ast_in_tokenlist(tokenlist, ast)
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
if fidx then -- note: fidx implies lidx
for idx=lidx,fidx,-1 do table.remove(tokenlist, idx) end
end
end
-- Inserts tokens from btokenlist into tokenlist. Preserves sort.
-- CATEGORY: tokenlist manipulation
local function insert_tokenlist(tokenlist, btokenlist)
local ftoken = btokenlist[1]
if ftoken then
-- Get index in tokenlist in which to insert tokens in btokenlist.
local fidx
for idx=1,#tokenlist do
if tokenlist[idx].fpos > ftoken.fpos then fidx = idx; break end
end
fidx = fidx or #tokenlist + 1 -- else append
-- Insert tokens.
tinsertlist(tokenlist, fidx, btokenlist)
end
end
-- Get character position range covered by ast in tokenlist. Returns nil,nil on not found.
-- CATEGORY: AST/tokenlist query
function M.ast_pos_range(ast, tokenlist) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
if fidx then
return tokenlist[fidx].fpos, tokenlist[lidx].lpos
else
return nil, nil
end
end
-- Gets string representation of AST node. nil if none.
-- IMPROVE: what if node is empty block?
-- CATEGORY: AST/tokenlist query
function M.ast_to_text(ast, tokenlist, src) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
local fpos, lpos = M.ast_pos_range(ast, tokenlist)
if fpos then
return src:sub(fpos, lpos)
else
return nil
end
end
-- Gets smallest AST node in top_ast/tokenlist/src
-- completely containing position range [pos1, pos2].
-- careful: "function" is not part of the `Function node.
-- If range is inside comment, returns comment also.
-- If range is inside whitespace, then returns true in third return value.
-- CATEGORY: AST/tokenlist query
function M.smallest_ast_containing_range(top_ast, tokenlist, pos1, pos2)
local f0idx, l0idx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos1, pos2)
-- Find enclosing AST.
M.ensure_parents_marked(top_ast)
local fidx, lidx = f0idx, l0idx
while tokenlist[fidx] and not tokenlist[fidx].ast.parent do fidx = fidx - 1 end
while tokenlist[lidx] and not tokenlist[lidx].ast.parent do lidx = lidx + 1 end
-- DEBUG(fidx, lidx, f0idx, l0idx, #tokenlist, pos1, pos2, tokenlist[fidx], tokenlist[lidx])
local ast = not (tokenlist[fidx] and tokenlist[lidx]) and top_ast or
M.common_ast_parent(tokenlist[fidx].ast, tokenlist[lidx].ast, top_ast)
-- DEBUG('m2', tokenlist[fidx], tokenlist[lidx], top_ast, ast, ast and ast.tag)
if l0idx == f0idx - 1 then -- whitespace
return ast, nil, true
elseif l0idx == f0idx and tokenlist[l0idx].tag == 'Comment' then
return ast, tokenlist[l0idx], nil
else
return ast, nil, nil
end
end
--IMPROVE: handle string edits and maybe others
-- Gets smallest statement block containing position pos or
-- nearest statement block before pos, whichever is smaller, given ast/tokenlist.
function M.current_statementblock(ast, tokenlist, pos)
local fidx,lidx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos, pos)
if fidx > lidx then fidx = lidx end -- use nearest backward
-- Find closest AST node backward
while fidx >= 1 and tokenlist[fidx].tag == 'Comment' do fidx=fidx-1 end
if fidx < 1 then return ast, false end
local mast = tokenlist[fidx].ast
if not mast then return ast, false end
mast = M.get_containing_statementblock(mast, ast)
local isafter = false
if mast.tag2 ~= 'Block' then
local mfidx,mlidx = M.ast_idx_range_in_tokenlist(tokenlist, mast)
if pos > mlidx then
isafter = true
end
end
return mast, isafter
end
-- Gets index of bast in ast (nil if not found).
-- CATEGORY: AST query
function M.ast_idx(ast, bast)
for idx=1,#ast do
if ast[idx] == bast then return idx end
end
return nil
end
-- Gets parent of ast and index of ast in parent.
-- Root node top_ast must also be provided. Returns nil, nil if ast is root.
-- Note: may call mark_parents.
-- CATEGORY: AST query
function M.ast_parent_idx(top_ast, ast)
if ast == top_ast then return nil, nil end
M.ensure_parents_marked(top_ast); assert(ast.parent)
local idx = M.ast_idx(ast.parent, ast)
return ast.parent, idx
end
-- Gets common parent of aast and bast. Always returns value.
-- Must provide root top_ast too.
-- CATEGORY: AST query
function M.common_ast_parent(aast, bast, top_ast)
M.ensure_parents_marked(top_ast)
local isparent = {}
local tast = bast; repeat isparent[tast] = true; tast = tast.parent until not tast
local uast = aast; repeat if isparent[uast] then return uast end; uast = uast.parent until not uast
assert(false)
end
-- Replaces old_ast with new_ast/new_tokenlist in top_ast/tokenlist.
-- Note: assumes new_ast is a block. assumes old_ast is a statement or block.
-- CATEGORY: AST/tokenlist
function M.replace_statements(top_ast, tokenlist, old_ast, new_ast, new_tokenlist)
remove_ast_in_tokenlist(tokenlist, old_ast)
insert_tokenlist(tokenlist, new_tokenlist)
if old_ast == top_ast then -- special case: no parent
M.switchtable(old_ast, new_ast) -- note: safe since block is not in tokenlist.
else
local parent_ast, idx = M.ast_parent_idx(top_ast, old_ast)
table.remove(parent_ast, idx)
tinsertlist(parent_ast, idx, new_ast)
end
-- fixup annotations
for _,bast in ipairs(new_ast) do
if top_ast.tag2 then M.mark_tag2(bast, bast.tag == 'Do' and 'StatBlock' or 'Block') end
if old_ast.parent then M.mark_parents(bast, old_ast.parent) end
end
end
-- Adjusts lineinfo in tokenlist.
-- All char positions starting at pos1 are shifted by delta number of chars.
-- CATEGORY: tokenlist
function M.adjust_lineinfo(tokenlist, pos1, delta)
for _,token in ipairs(tokenlist) do
if token.fpos >= pos1 then
token.fpos = token.fpos + delta
end
if token.lpos >= pos1 then
token.lpos = token.lpos + delta
end
end
--tokenlist.nbytes = tokenlist.nbytes + delta
end
-- For each node n in ast, sets n.parent to parent node of n.
-- Assumes ast.parent will be parent_ast (may be nil)
-- CATEGORY: AST query
function M.mark_parents(ast, parent_ast)
ast.parent = parent_ast
for _,ast2 in ipairs(ast) do
if type(ast2) == 'table' then
M.mark_parents(ast2, ast)
end
end
end
-- Calls mark_parents(ast) if ast not marked.
-- CATEGORY: AST query
function M.ensure_parents_marked(ast)
if ast[1] and not ast[1].parent then M.mark_parents(ast) end
end
-- For each node n in ast, sets n.tag2 to context string:
-- 'Block' - node is block
-- 'Stat' - node is statement
-- 'StatBlock' - node is statement and block (i.e. `Do)
-- 'Exp' - node is expression
-- 'Explist' - node is expression list (or identifier list)
-- 'Pair' - node is key-value pair in table constructor
-- note: ast.tag2 will be set to context.
-- CATEGORY: AST query
local iscertainstat = {Do=true, Set=true, While=true, Repeat=true, If=true,
Fornum=true, Forin=true, Local=true, Localrec=true, Return=true, Break=true}
function M.mark_tag2(ast, context)
context = context or 'Block'
ast.tag2 = context
for i,bast in ipairs(ast) do
if type(bast) == 'table' then
local nextcontext
if bast.tag == 'Do' then
nextcontext = 'StatBlock'
elseif iscertainstat[bast.tag] then
nextcontext = 'Stat'
elseif bast.tag == 'Call' or bast.tag == 'Invoke' then
nextcontext = context == 'Block' and 'Stat' or 'Exp'
--DESIGN:Metalua: these calls actually contain expression lists,
-- but the expression list is not represented as a complete node
-- by Metalua (as blocks are in `Do statements)
elseif bast.tag == 'Pair' then
nextcontext = 'Pair'
elseif not bast.tag then
if ast.tag == 'Set' or ast.tag == 'Local' or ast.tag == 'Localrec'
or ast.tag == 'Forin' and i <= 2
or ast.tag == 'Function' and i == 1
then
nextcontext = 'Explist'
else
nextcontext = 'Block'
end
else
nextcontext = 'Exp'
end
M.mark_tag2(bast, nextcontext)
end
end
end
-- Gets smallest statement or block containing or being `ast`.
-- The AST root node `top_ast` must also be provided.
-- Note: may decorate AST as side-effect (mark_tag2/mark_parents).
-- top_ast is assumed a block, so this is always successful.
-- CATEGORY: AST query
function M.get_containing_statementblock(ast, top_ast)
if not top_ast.tag2 then M.mark_tag2(top_ast) end
if ast.tag2 == 'Stat' or ast.tag2 == 'StatBlock' or ast.tag2 == 'Block' then
return ast
else
M.ensure_parents_marked(top_ast)
return M.get_containing_statementblock(ast.parent, top_ast)
end
end
-- Finds smallest statement, block, or comment AST in ast/tokenlist containing position
-- range [fpos, lpos]. If allowexpand is true (default nil) and located AST
-- coincides with position range, then next containing statement is used
-- instead (this allows multiple calls to further expand the statement selection).
-- CATEGORY: AST query
function M.select_statementblockcomment(ast, tokenlist, fpos, lpos, allowexpand)
--IMPROVE: rename ast to top_ast
local match_ast, comment_ast = M.smallest_ast_containing_range(ast, tokenlist, fpos, lpos)
local select_ast = comment_ast or M.get_containing_statementblock(match_ast, ast)
local nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
--DEBUG('s', nfpos, nlpos, fpos, lpos, match_ast.tag, select_ast.tag)
if allowexpand and fpos == nfpos and lpos == nlpos then
if comment_ast then
-- Select enclosing statement.
select_ast = match_ast
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
else
-- note: multiple times may be needed to expand selection. For example, in
-- `for x=1,2 do f() end` both the statement `f()` and block `f()` have
-- the same position range.
M.ensure_parents_marked(ast)
while select_ast.parent and fpos == nfpos and lpos == nlpos do
select_ast = M.get_containing_statementblock(select_ast.parent, ast)
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
end
end
end
return nfpos, nlpos
end
-- Converts tokenlist to string representation for debugging.
-- CATEGORY: tokenlist debug
function M.dump_tokenlist(tokenlist)
local ts = {}
for i,token in ipairs(tokenlist) do
ts[#ts+1] = 'tok.' .. i .. ': [' .. token.fpos .. ',' .. token.lpos .. '] '
.. tostring(token[1]) .. ' ' .. tostring(token.ast.tag)
end
return table.concat(ts, '\n') -- .. 'nbytes=' .. tokenlist.nbytes .. '\n'
end
--FIX:Q: does this handle Unicode ok?
--FIX?:Metalua: fails on string with escape sequence '\/'. The Reference Manual
-- doesn't say this sequence is valid though.
--FIX:Metalua: In `local --[[x]] function --[[y]] f() end`,
-- 'x' comment omitted from AST.
--FIX:Metalua: `do --[[x]] end` doesn't generate comments in AST.
-- `if x then --[[x]] end` and `while 1 do --[[x]] end` generates
-- comments in first/last of block
--FIX:Metalua: `--[[x]] f() --[[y]]` returns lineinfo around `f()`.
-- `--[[x]] --[[y]]` returns lineinfo around everything.
--FIX:Metalua: `while 1 do --[[x]] --[[y]] end` returns first > last
-- lineinfo for contained block
--FIX:Metalua: search for "PATCHED:LuaInspect" in the metalualib folder.
--FIX?:Metalua: loadstring parses "--x" but metalua omits the comment in the AST
--FIX?:Metalua: `local x` is generating `Local{{`Id{x}}, {}}`, which
-- has no lineinfo on {}. This is contrary to the Metalua
-- spec: `Local{ {ident+} {expr+}? }.
-- Other things like `self` also generate no lineinfo.
-- The ast2.lineinfo above avoids this.
--FIX:Metalua: Metalua shouldn't overwrite ipairs/pairs. Note: Metalua version
-- doesn't set errorlevel correctly.
--Q:Metalua: Why does `return --[[y]] z --[[x]]` have
-- lineinfo.first.comments, lineinfo.last.comments,
-- plus lineinfo.comments (which is the same as lineinfo.first.comments) ?
--CAUTION:Metalua: `do f() end` returns lineinfo around `do f() end`, while
-- `while 1 do f() end` returns lineinfo around `f()` for inner block.
--CAUTION:Metalua: The lineinfo on Metalua comments is inconsistent with other
-- nodes
--CAUTION:Metalua: lineinfo of table in `f{}` is [3,2], of `f{ x,y }` it's [4,6].
-- This is inconsistent with `x={}` which is [3,4] and `f""` which is [1,2]
-- for the string.
--CAUTION:Metalua: only the `function()` form of `Function includes `function`
-- in lineinfo. 'function' is part of `Localrec and `Set in syntactic sugar form.
--[=[TESTSUITE
-- test longest_prefix/longest_postfix
local function pr(text1, text2)
local lastv
local function same(v)
assert(not lastv or v == lastv); lastv = v; return v
end
local function test1(text1, text2) -- test prefix/postfix
same(longest_prefix(text1, text2))
same(longest_postfix(text1:reverse(), text2:reverse()))
end
local function test2(text1, text2) -- test swap
test1(text1, text2)
test1(text2, text1)
end
for _,extra in ipairs{"", "x", "xy", "xyz"} do -- test extra chars
test2(text1, text2..extra)
test2(text2, text1..extra)
end
return lastv
end
check('==', pr("",""), 0)
check('==', pr("a",""), 0)
check('==', pr("a","a"), 1)
check('==', pr("ab",""), 0)
check('==', pr("ab","a"), 1)
check('==', pr("ab","ab"), 2)
check('==', pr("abcdefg","abcdefgh"), 7)
--]=]
--[=[TESTSUITE
print 'DONE'
--]=]
return M
|