lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


2011/10/10 Philippe Lhoste <PhiLho@gmx.net>:
> On 09/10/2011 23:25, Tarmo Pikaro wrote:
>>
>> Why do you need XML ?
>> I would propose to use JSON if you're not bound to any existing XML file
>> format nonsense.
>
> This is questioned regularly in this list. I can go a step further, and
> question "why Json?". After all, its format is made to be easily parsed by
> JavaScript, not by Lua. You can as well use Lua table format as new format.
>
> But in both cases, there are tons of XML and Json data to be parsed,
> produced by 3rd party libraries. From XHTML or RSS data to StackOverflow
> stats or Twitter data.
>
> Beside, OP wrote: "I've got some legacy code", so he is clearly in the
> "can't do without it" camp.

In his word, these legacy code is not write in Lua, so I advice just
use pure Lua to parse XML (that is very easy), and use Lua's table
format to process it, you will find it's very convenience.

this is a very simple parser:

local escapes = {
    amp = '&',
    quot = '"',
    apos = '\'',
    gt = '>',
    lt = '<',
}

local function helper(s)
    local num = string.match(s, '^#(%d+)$')
    if num then return string.char(tonumber(num)) end
    return escapes[s]
end

local function strip_escapes(s)
    s = string.gsub(s, '&(#?[%a%d]+);', helper)
    --s = string.gsub(s, '&amp;', '&')
    return s
end

local function parseargs(s)
    local arg = {}
    string.gsub(s, "([%w_]+)%s*=%s*([\"'])(.-)%2", function (w, _, a)
        arg[strip_escapes(w)] = strip_escapes(a)
    end)
    return arg
end

local function xmlToTable(s)
    local i = 1
    local top = {}
    local stack = {top}

    while true do
        local tb,te, close,tag,xarg,empty = string.find(s,
"<(%/?)(%w+)(.-)(%/?)>", i)
        if not tb then break end

        -- uncomment below to support free text in XML
        --local text = string.sub(s, i, tb - 1)
        --if not string.match(text, "^%s*$") then
            --table.insert(top, strip_escapes(text))
        --end

        if empty == "/" then  -- empty element tag
            local elem = parseargs(xarg)
            elem.tagName = tag
            table.insert(top, elem)

        elseif close == "" then   -- start tag
            top = parseargs(xarg)
            top.tagName = tag
            table.insert(stack, top)   -- new level

        else  -- End tag
            local toclose = assert(table.remove(stack))  -- remove top
            top = stack[#stack]
            if #stack < 1 then
                error("nothing to close with "..label)
            end
            if toclose.tagName ~= tag then
                error("trying to close "..toclose.tagName.." with "..tag)
            end
            table.insert(top, toclose)
        end
        i = te + 1
    end

    -- uncomment below to support free text in XML
    --local text = string.sub(s, i)
    --if not string.match(text, "^%s*$") then
        --table.insert(top, strip_escapes(text))
    --end

    if #stack > 1 then
        error("unclosed "..stack[#stack].label)
    end
    return stack[1][1]
end

>
> --
> Philippe Lhoste
> --  (near) Paris -- France
> --  http://Phi.Lho.free.fr
> --  --  --  --  --  --  --  --  --  --  --  --  --  --
>
>
>