String Query

lua-users home
wiki

stringquery is a string pattern matching and transformation library inspired partly by [jQuery].

Description

Primary design qualities are

The semblance to jQuery is that we construct a query object containing a set of selections for the thing being studied, we then perform a series of chained method calls to refine and alter those selections (all at once as a set), and finally we perform an operation on those selections (e.g. replace or return). See examples in the below test suite.

Status

The design of this library is preliminary and originally intended only as an experiment. Improvements welcome.

Author

DavidManura

test_stringquery.lua

-- test_stringquery.lua
-- test of dmlib.stringquery.

local SQ = require "dmlib.stringquery"
local sq = SQ.sq

local function asserteq(a,b)
  if a ~= b then
    error('[' .. tostring(a) .. '] ~= [' .. tostring(b) .. ']', 2)
  end
end

assert(
  sq("this is a test"):match("%w+"):replace('_')
  == '_ _ _ _'
)

assert(
  sq('<p>this is a <a href="/">test</a> http://lua-users.org http://lua.org </p>')
  :match("<[^>]*>")
  :invert()
  :match('http://[^ ]+')
  :filter('user')
  :replace(function(s) return '<a href="' .. s .. '">' .. s .. '</a>' end)
  == '<p>this is a <a href="/">test</a> <a href="http://lua-users.org">' ..
     'http://lua-users.org</a> http://lua.org </p>'
)

assert(
  sq("the red book, the green book, and the blue book")
  :match("%w+ book")
  :filter(SQ.any("^green ", "^red"))
  :replace(string.upper)
  == 'the RED BOOK, the GREEN BOOK, and the blue book'
)

-- solution to problem from http://lua-users.org/wiki/FrontierPattern
assert(
  sq("the QUICK BROwn fox")
  :match("%w+")
  :filter("^[A-Z]*$")
  :get_unpacked()
  == 'QUICK'
)

-- examples in docs
asserteq(
  table.concat(sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get(), ','),
  'A,BETA,G,A' )
asserteq(
  table.concat(sq'this is a test':match'%w+':filter'^....$':get(), ','),
  'this,test' )
asserteq(
  table.concat(sq'123 abc 234':match'%a+':invert():get(), ','),
  '123 , 234' )
asserteq(
  table.concat({sq'this is a test':match'%w+':get_unpacked()}, ','),
  'this,is,a,test' )
asserteq(
  table.concat(sq'hello':get(), ','),
  'hello' )
asserteq(
  SQ.any('%a%d', '%d%a')(' a1 '), true )
asserteq(
  SQ.all('%a%d', '%d%a')(' a1 2b '), true )

print 'DONE'

dmlib/stringquery.lua

-- dmlib/stringquery.lua (dmlib.stringquery)
--
-- String matching/replacing library inspired partly by jquery
--
-- Warning: preliminary design.
--
-- (c) 2009 David Manura, Licensed under the same terms as Lua (MIT license).

local M = {}


-- Replace array part of table dst with array part of table src.
local function tioverride(dst, src)
  for k    in ipairs(dst) do dst[k] = nil end
  for k, v in ipairs(src) do dst[k] = v end
  return dst
end

-- Returns array of substrings in s, paritioned
-- by array of ranges (1-based start and end indicies).
-- Always returns odd-number of substrings (even indexed
-- substrings are inside the ranges).
-- Example:
--   partition("abcdefg", {{1,2},{4,5}})
--   --> {'','ab', 'c','de', 'fg'}
local function partition(s, ranges)
  local result = {}
  local i = 1
  for _,range in ipairs(ranges) do
    local ia,ib = unpack(range)
    table.insert(result, s:sub(i,ia-1))
    table.insert(result, s:sub(ia,ib))
    i = ib+1
  end
  table.insert(result, s:sub(i))
  return result
end


-- Helper function.
-- Processes argument, allowing function or
-- pattern matching function represented as string.
local function getarg(o)
  local f
  if type(o) == 'string' then
    f = function(s) return s:match(o) end
  else
    f = o
  end
  return f
end


local mt = {}
mt.__index = mt


-- Defines new selections based on matches of
-- pattern inside current selections.
-- Example:
--   sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get()
--   --> {'A', 'BETA', 'G', 'A'}
function mt:match(pat)
  local results = {}
  for _,range in ipairs(self) do
    local ia0,ib0 = unpack(range)
    local stmp = self.s:sub(ia0,ib0)
    local ia = 1
    repeat
      local ib
      ia,ib = stmp:find(pat, ia)
      if ia then
        table.insert(results, {ia+ia0-1,ib+ia0-1})
        ia = ib+1
      end
    until not ia
  end
  tioverride(self, results)
  return self
end


-- Defines new selections based only on current selections
-- that match object o.  o can be a function (s -> b),
-- return returns Boolean b whether string s matches.
-- Alternately o can be a string pattern.
-- Example:
--   sq'this is a test':match'%w+':filter'^....$':get()
--   --> {'this', 'test'}
function mt:filter(o)
  local f = getarg(o)

  local result = {}
  for _,range in ipairs(self) do
    local ia,ib = unpack(range)
    local si = self.s:sub(ia,ib)
    if f(si) then
      table.insert(result, {ia,ib})
    end
  end
  tioverride(self, result)
  return self
end


-- Defines new selections that form the inverse (compliment)
-- of the current selections.
-- warning: might not be fully correct (e.g. would
-- sq(s):invert():invert() == sq(s)?).
-- Example:
--   sq'123 abc 234':match'%a+':invert():get()
--   --> {'123 ', ' 234'}
function mt:invert()
  local result = {}
  local i=1
  for _,range in ipairs(self) do
    local ia,ib = unpack(range)
    if ia > i then
      table.insert(result, {i,ia-1})
    end
    i = ib+1
  end
  if i < #self.s then
    table.insert(result, {i,#self.s})
  end
  tioverride(self, result)
  return self
end


-- Replace selections using o and return string.
-- o can be a function (s1 -> s2) that indicates that
-- string s1 should be replaced with string s2).
-- Alternately, o can be a string that all selections
-- will be replaced with.
function mt:replace(o)
  local f
  if type(o) == 'string' then
    f = function(s) return o end
  else
    f = o
  end

  local result = partition(self.s, self)
  for i=2,#result,2 do
    result[i] = f(result[i]) or ''
  end

  return table.concat(result, '')
end


-- Returns all string selections as array.
-- Example:
--   sq'this is a test':match'%w+':get()
--   --> {'this', 'is', 'a', 'test'}
function mt:get()
  local parts = partition(self.s, self)
  local result = {}
  for i=2,#parts,2 do
    table.insert(result, parts[i])
  end
  return result 
end


-- Returns all string selections as unpacked list.
-- Example:
--   sq'this is a test':match'%w+':get()
--   --> 'this', 'is', 'a', 'test'
function mt:get_unpacked()
  return unpack(self:get())
end


-- Prints selections.
-- For debugging.  Requires penlight 0.6.3
function mt:print_dump()
  local dump = require "pl.pretty" . write
  print(dump(self))
  return self
end


-- Constructor for string query given string s.
-- The selection by default is the entire string.
-- Example:
--   sq'hello':get() --> {'hello'}
local function sq(s)
  return setmetatable({s=s, {1,#s}}, mt)
end
M.sq = sq


-- Returns a predicate function that matches
-- *any* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
--   any('%a%d', '%d%a')(' a1 ') --> true
local function any(...)
  local os = {...}
  for i,v in ipairs(os) do os[i] = getarg(v) end
  return function(s)
    for _,o in ipairs(os) do
      if o(s) then return true end
    end
    return false
  end
end
M.any = any


-- Returns a predicate function that matches
-- *all* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
--   all('%a%d', '%d%a')(' a1 2b ') --> true
local function all(...)
  local os = {...}
  for i,v in ipairs(os) do os[i] = getarg(v) end
  return function(s)
    for _,o in ipairs(os) do
      if not o(s) then return false end
    end
    return true
  end
end
M.all = all


return M

See Also


RecentChanges · preferences
edit · history
Last edited October 31, 2009 7:46 pm GMT (diff)