Enhanced File Lines

lua-users home
wiki

Here are functions for portable line-by-line text processing (handling OS differences in '\r' and '\n' characters).

-- (c) 2008 David Manura. Licensed under the same terms as Lua (MIT).

-- file_lines(f) is similar to f:lines() for file f.
-- The main difference is that read_lines includes
-- new-line character sequences ("\n", "\r\n", "\r"),
-- if any, at the end of each line.  Embedded "\0" are also handled.

-- Caution: The newline behavior can depend on whether f is opened
-- in binary or ASCII mode.
--
local function file_lines(f)
  local CHUNK_SIZE = 1024
  local buffer = ""
  local pos_beg = 1
  return function()
    local pos, chars
    while 1 do
      pos, chars = buffer:match('()([\r\n].)', pos_beg)
      if pos or not f then
        break
      elseif f then
        local chunk = f:read(CHUNK_SIZE)
        if chunk then
          buffer = buffer:sub(pos_beg) .. chunk
          pos_beg = 1
        else
          f = nil
        end
      end
    end
    if not pos then
      pos = #buffer
    elseif chars == '\r\n' then
      pos = pos + 1
    end
    local line = buffer:sub(pos_beg, pos)
    pos_beg = pos + 1
    if #line > 0 then
      return line
    end    
  end
end


--
-- Splits string s into array of lines, returning the result.
-- New-line character sequences ("\n", "\r\n", "\r"),
-- if any, are included at the ends of the lines.
--
local function split_newlines(s)
  local ts = {}
  local posa = 1
  while 1 do
    local pos, chars = s:match('()([\r\n].?)', posa)
    if pos then
      if chars == '\r\n' then pos = pos + 1 end
      local line = s:sub(posa, pos)
      ts[#ts+1] = line
      posa = pos + 1
    else
      local line = s:sub(posa)
      if line ~= '' then ts[#ts+1] = line end
      break      
    end
  end
  return ts
end


--[=[slower implementation
local function split_newlines(s)
  local ts = {}
  local lastc
  s:gsub('([^\r\n]*)([\r\n])', function(a,b)
    if a == '' and lastc == '\r' and b == '\n' then
      ts[#ts] = ts[#ts] .. b
      lastc = nil
    else
      ts[#ts+1] = a .. b
      lastc = b
    end
    return ''
  end)
  local line = s:match('([^\r\n]+)$')
  if line then ts[#ts+1] = line end
  return ts
end
--]=]


-- test suite


-- utility function for test suite.
-- Create mock file for string s.
local function mock_file(s)
  local f = {}
  function f:read(n, ...)
    assert(type(n)=='number' and select('#', ...) == 0, 'NOT IMPL')
    local chunk = s:sub(1,n)
    s = s:sub(n+1)
    return chunk ~= '' and chunk or nil
  end
  return f
end


-- utility function for test suite.
local function mytostring(s)
  return type(s) == 'string'
         and string.format('%q', s):gsub('\n','n')
         or  tostring(s)
end


-- utility function for test suite.
local function asserteq(a,b,level)
  level = (level or 1) + 1
  if a ~= b then
    error(mytostring(a) .. '~=' .. mytostring(b), level)
  end
end


-- utility function for test suite (wrap file_lines)
local function wrap1(s)
  local f = mock_file(s)
  local ts = {}
  for line in file_lines(f) do ts[#ts+1] = line end
  return table.concat(ts, '|')
end


-- utility function for test suite (wrap split_newlines)
local function wrap2(s)
  return table.concat(split_newlines(s), '|')
end


local SZ = 1024 -- chunk size

-- test basics
for _,f in ipairs{wrap1, wrap2} do
for _,i in ipairs{0,1,2,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do
  local s = (' '):rep(i)
  local function test(a, b)
    asserteq(f(s .. a), s .. b)
  end
  test('', '')
  test('\r', '\r')
  test('\n', '\n')
  test('a',  'a')
  test('\r\n', '\r\n')
  test('\n\r', '\n|\r')
  test('\r\r', '\r|\r')
  test('\n\n', '\n|\n')
  test('a\n',  'a\n')
  test('a\r',  'a\r')
  test('\na',  '\n|a')
  test('\ra',  '\r|a')
end end

-- check that two implementations are equivalent on a lot of data.
local cs = {'', 'a', '\r', '\n', ' '}
for _,i in ipairs{0,1,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do
for j=0,1 do
  local s = (' '):rep(i + j * SZ)
  for _,c1 in ipairs(cs) do
  for _,c2 in ipairs(cs) do
  for _,c3 in ipairs(cs) do
  for _,c4 in ipairs(cs) do
  for _,c5 in ipairs(cs) do
    local s = c1 .. c2 .. c3 .. c4 ..c5
    local t1 = wrap1(s)
    local t2 = wrap2(s)
    asserteq(t1, t2)
  end end end end end
end end


print 'DONE'

Note: these functions are used in LuaPatch.

--DavidManura

See Also


RecentChanges · preferences
edit · history
Last edited December 28, 2008 3:41 am GMT (diff)