diff --git a/src/pallene/Lexer.lua b/src/pallene/Lexer.lua index b1c1b3a6..7681b911 100644 --- a/src/pallene/Lexer.lua +++ b/src/pallene/Lexer.lua @@ -26,12 +26,14 @@ local newline = P"\n\r" + P"\r\n" + P"\n" + P"\r" -- See inclinenumber in llex.c local find_newline = (1 - newline)^0 * newline local comment_line = RE"[^\n\r]*" * newline^-1 - local longstring_open = P("[") * P("=")^0 * P("[") +local longstring_content = RE".[^]]*" + +local hex_number = RE"[0-9A-Fa-f]" local string_delimiter = RE"[\"\']" local string_content = RE"[^\n\r]" -local string_hex_number = RE"[0-9A-Fa-f][0-9A-Fa-f]?" +local string_hex_number = RE"[0-9A-Fa-f][0-9A-Fa-f]" local string_dec_number = RE"[0-9][0-9]?[0-9]?" local string_u_number = RE"[0-9A-Fa-f]+" @@ -75,14 +77,15 @@ function Lexer:init(file_name, input) self.pos = 1 -- Absolute position in the input self.line = 1 -- Line number for error messages self.col = 1 -- Column number for error messages - self.matched = false -- Last matched substring + self.old_pos = false -- Absolute position of last matched substring end function Lexer:loc() return Location.new(self.file_name, self.line, self.col, self.pos) end --- If the given pattern matches, move the lexer forward and set self.matched. +-- If the given pattern matches, move the lexer forward. +-- If it doesn't match, unset old_pos. -- The pattern can be either an LPEG pattern or a literal string. local pattern_cache = {} function Lexer:try(pat) @@ -92,25 +95,33 @@ function Lexer:try(pat) end assert(lpeg.type(pat) == "pattern") + local old_pos = self.pos local new_pos = pat:match(self.input, self.pos) if new_pos then - self.matched = string.sub(self.input, self.pos, new_pos - 1) - local i = 1 + local i = old_pos while true do - local j = find_newline:match(self.matched, i) - if not j then break end + local j = find_newline:match(self.input, i) + if not j or j > new_pos then break end self.line = self.line + 1 self.col = 1 i = j end - self.col = self.col + #self.matched - i + 1 + self.old_pos = old_pos self.pos = new_pos + self.col = self.col + (new_pos - i) return true else + self.old_pos = false return false end end +-- The substring for the last thing found by Lexer:try() +function Lexer:matched() + assert(self.old_pos) + return string.sub(self.input, self.old_pos, self.pos - 1) +end + function Lexer:read_short_string(delimiter) local parts = {} while not self:try(delimiter) do @@ -119,7 +130,7 @@ function Lexer:read_short_string(delimiter) then table.insert(parts, "\n") elseif self:try(string_dec_number) then - local n = assert(tonumber(self.matched, 10)) + local n = assert(tonumber(self:matched(), 10)) if n < 256 then table.insert(parts, string.char(n)) else @@ -127,10 +138,11 @@ function Lexer:read_short_string(delimiter) end elseif self:try("x") then - if self:try(string_hex_number) and #self.matched == 2 then - local n = assert(tonumber(self.matched, 16)) + if self:try(string_hex_number) then + local n = assert(tonumber(self:matched(), 16)) table.insert(parts, string.char(n)) else + self:try(hex_number) -- possibly advance error location return false, "hexadecimal digit expected" end @@ -141,8 +153,9 @@ function Lexer:read_short_string(delimiter) if not self:try(string_u_number) then return false, "hexadecimal digit expected" end - local n = tonumber(self.matched, 16) - if #self.matched > 8 or n >= 0x7fffffff then + local s = self:matched() + local n = tonumber(s, 16) + if #s > 8 or n >= 0x7fffffff then return false, "UTF-8 value too large" end if not self:try("}") then @@ -155,18 +168,19 @@ function Lexer:read_short_string(delimiter) self:try(space) elseif self:try(one_char) then - local s = string_escapes[self.matched] - if s then - table.insert(parts, s) + local s = self:matched() + local c = string_escapes[s] + if c then + table.insert(parts, c) else - return false, string.format("invalid escape sequence '\\%s'", self.matched) + return false, string.format("invalid escape sequence '\\%s'", s) end else return false, "unfinished string" end elseif self:try(string_content) then - table.insert(parts, self.matched) + table.insert(parts, self:matched()) else return false, "unfinished string" @@ -180,15 +194,18 @@ function Lexer:read_long_string(delimiter_size, what) local close = "]" .. string.rep("=", delimiter_size) .. "]" self:try(newline) - local parts = {} + local first_pos = self.pos + local last_pos = self.pos + while not self:try(close) do - if self:try(one_char) then - table.insert(parts, self.matched) + if self:try(longstring_content) then + last_pos = self.pos else return false, string.format("unfinished %s (starting at line %d)", what, firstline) end end - return table.concat(parts) + + return string.sub(self.input, first_pos, last_pos - 1) end function Lexer:_next() @@ -197,46 +214,51 @@ function Lexer:_next() elseif self:try("--") then if self:try(longstring_open) then - local s, err = self:read_long_string(#self.matched-2, "long comment") + local len = self.pos - self.old_pos - 2 + local s, err = self:read_long_string(len, "long comment") if not s then return false, err end return "COMMENT", s else self:try(comment_line) - return "COMMENT", self.matched + return "COMMENT", self:matched() end elseif self:try(string_delimiter) then - local s, err = self:read_short_string(self.matched) + local s, err = self:read_short_string(self:matched()) if not s then return false, err end return "STRING", s elseif self:try(longstring_open) then - local s, err = self:read_long_string(#self.matched-2, "long string") + local len = self.pos - self.old_pos - 2 + local s, err = self:read_long_string(len, "long string") if not s then return false, err end return "STRING", s elseif self:try(possible_number) then - local n = tonumber(self.matched) + local s = self:matched() + local n = tonumber(s) if n then return "NUMBER", n else - return false, string.format("malformed number near '%s'", self.matched) + return false, string.format("malformed number near '%s'", s) end elseif self:try(symbol) then -- Must try this after numbers, because of '.' - return self.matched + return self:matched() elseif self:try(identifier) then - if is_keyword[self.matched] then - return self.matched + local name = self:matched() + if is_keyword[name] then + return name else - return "NAME", self.matched + return "NAME", name end elseif self:try(one_char) then - local what = (string.match(self.matched, "%g") - and string.format("'%s'", self.matched) - or string.format("<\\%d>", string.byte(self.matched))) + local c = self:matched() + local what = (string.match(c, "%g") + and string.format("'%s'", c) + or string.format("<\\%d>", string.byte(c))) return false, string.format("unexpected symbol near %s", what) else diff --git a/src/pallene/parser.lua b/src/pallene/parser.lua index 3b19c5f8..5a1dc246 100644 --- a/src/pallene/parser.lua +++ b/src/pallene/parser.lua @@ -51,6 +51,7 @@ function Parser:advance() tok, err = self.lexer:next() if not tok then self:syntax_error(self.lexer:loc(), "%s", err) + self:abort_parsing() end if tok.name == "COMMENT" then table.insert(self.comment_regions, { tok.loc.pos, tok.end_pos })