I wrote some code to parse Lua 5.2 strings in Lua 5.1, using patterns. It works perfectly as far as I tested.
local squote = "'" local dquote = '"' -- escape "sequences" local escapeSequences = { a = '\a', b = '\b', f = '\f', r = '\r', n = '\n', t = '\t', v = '\v', ['"'] = '"', ["'"] = "'", ['\\'] = '\\' } local pads = { z = "\\z", x = "\\x", ['0'] = '\\0', ['1'] = '\\1', ['2'] = '\\2', ['3'] = '\\3', ['4'] = '\\4', ['5'] = '\\5', ['6'] = '\\6', ['7'] = '\\7', ['8'] = '\\8', ['9'] = '\\9' } setmetatable(pads, { __index = function(t,k) return "\\v" .. k .. "/" end }) -- Parse a string like it's a Lua 5.2 string. local function parseString52(s) -- "validate" string local startChar = string.sub(s,1,1) assert(startChar==squote or startChar==dquote) assert(string.sub(s, -1, -1) == startChar) -- remove quotes local str = string.sub(s, 2, -2) -- TODO check for unescaped quotes -- replace "normal" escapes with a padded escape str = string.gsub(str, "\\(.)", function(c) -- swap startChar with some invalid escape if c == startChar then c = "m" -- swap the invalid escape with startChar elseif c == "m" then c = startChar end return pads[c] end) -- check for a padded escape for startChar - remember this is actually our invalid escape assert(not string.find(str, "\\v" .. startChar .. "/"), "invalid escape sequence near '\\m'") -- then check for non-escaped startChar assert(not string.find(str, startChar), "unfinished string") -- pad 1-digit numerical escapes str = string.gsub(str, "\\([0-9])[^0-9]", "\\00%1") -- pad 2-digit numerical escapes str = string.gsub(str, "\\([0-9][0-9])[^0-9]", "\\0%1") local t = {} local i = 1 local last = 1 -- split on \z for from,to in function(x,y) return string.find(x, "\\z", y+1) end, str, 0 do t[i] = string.sub(str, last, from - 1) last = to+1 i = i + 1 end t[i] = string.sub(str, last, #str) -- parse results local nt = {} for x,y in ipairs(t) do nt[x] = string.gsub(y, "\\(([vx0-9])((.).))", function(a,b,c,d) if b == "v" then return escapeSequences[d] or (d == "m" and startChar or assert(false, "invalid escape sequence near '\\" .. d .. "'")) elseif b == "x" then local n = tonumber(c, 16) assert(n, "hexadecimal digit expected near '\\x" .. c .. "'") return string.char(n) else local n = tonumber(a) assert(n < 256, "decimal escape too large near '\\" .. a .. "'") return string.char(n) end end) if x > 1 then -- handle \z nt[x] = string.gsub(nt[x], "^[%s]*", "") end end -- merge return table.concat(nt, "") end return { parse52 = parseString52, }
(For test cases see here, I decided to keep them separate so to not bloat up the post)
Things I'm not too sure about:
- Should I use
if not (condition) then error(msg) end
instead ofassert(condition, msg)
? (Usually assertions are reserved for tests, but I'm not sure if this holds in Lua? I'm also not sure ifassert()
calls are JITted by LuaJIT.)- Should I
return nil, msg
instead of erroring?
- Should I
- Should I handle long strings?
- Should I error if the unparsed string literal is smaller than the passed buffer? (That is, if you pass in something like
[["<unparsed string literal>"<rest of buffer>]]
) Or should I find the end of the string myself?- If so, should I return the end position?
- Should I take an optional start position?
- Can this be made faster?
__index
for the cases you need to handle specially).\$\endgroup\$