[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: XML parser with DOM-like API
- From: Xavier Wang <weasley.wx@...>
- Date: Mon, 10 Oct 2011 13:35:22 +0800
2011/10/10 Philippe Lhoste <PhiLho@gmx.net>:
> On 09/10/2011 23:25, Tarmo Pikaro wrote:
>>
>> Why do you need XML ?
>> I would propose to use JSON if you're not bound to any existing XML file
>> format nonsense.
>
> This is questioned regularly in this list. I can go a step further, and
> question "why Json?". After all, its format is made to be easily parsed by
> JavaScript, not by Lua. You can as well use Lua table format as new format.
>
> But in both cases, there are tons of XML and Json data to be parsed,
> produced by 3rd party libraries. From XHTML or RSS data to StackOverflow
> stats or Twitter data.
>
> Beside, OP wrote: "I've got some legacy code", so he is clearly in the
> "can't do without it" camp.
In his word, these legacy code is not write in Lua, so I advice just
use pure Lua to parse XML (that is very easy), and use Lua's table
format to process it, you will find it's very convenience.
this is a very simple parser:
local escapes = {
amp = '&',
quot = '"',
apos = '\'',
gt = '>',
lt = '<',
}
local function helper(s)
local num = string.match(s, '^#(%d+)$')
if num then return string.char(tonumber(num)) end
return escapes[s]
end
local function strip_escapes(s)
s = string.gsub(s, '&(#?[%a%d]+);', helper)
--s = string.gsub(s, '&', '&')
return s
end
local function parseargs(s)
local arg = {}
string.gsub(s, "([%w_]+)%s*=%s*([\"'])(.-)%2", function (w, _, a)
arg[strip_escapes(w)] = strip_escapes(a)
end)
return arg
end
local function xmlToTable(s)
local i = 1
local top = {}
local stack = {top}
while true do
local tb,te, close,tag,xarg,empty = string.find(s,
"<(%/?)(%w+)(.-)(%/?)>", i)
if not tb then break end
-- uncomment below to support free text in XML
--local text = string.sub(s, i, tb - 1)
--if not string.match(text, "^%s*$") then
--table.insert(top, strip_escapes(text))
--end
if empty == "/" then -- empty element tag
local elem = parseargs(xarg)
elem.tagName = tag
table.insert(top, elem)
elseif close == "" then -- start tag
top = parseargs(xarg)
top.tagName = tag
table.insert(stack, top) -- new level
else -- End tag
local toclose = assert(table.remove(stack)) -- remove top
top = stack[#stack]
if #stack < 1 then
error("nothing to close with "..label)
end
if toclose.tagName ~= tag then
error("trying to close "..toclose.tagName.." with "..tag)
end
table.insert(top, toclose)
end
i = te + 1
end
-- uncomment below to support free text in XML
--local text = string.sub(s, i)
--if not string.match(text, "^%s*$") then
--table.insert(top, strip_escapes(text))
--end
if #stack > 1 then
error("unclosed "..stack[#stack].label)
end
return stack[1][1]
end
>
> --
> Philippe Lhoste
> -- (near) Paris -- France
> -- http://Phi.Lho.free.fr
> -- -- -- -- -- -- -- -- -- -- -- -- -- --
>
>
>