Enhanced File Lines |
|
-- (c) 2008 David Manura. Licensed under the same terms as Lua (MIT). -- file_lines(f) is similar to f:lines() for file f. -- The main difference is that read_lines includes -- new-line character sequences ("\n", "\r\n", "\r"), -- if any, at the end of each line. Embedded "\0" are also handled. -- Caution: The newline behavior can depend on whether f is opened -- in binary or ASCII mode. -- local function file_lines(f) local CHUNK_SIZE = 1024 local buffer = "" local pos_beg = 1 return function() local pos, chars while 1 do pos, chars = buffer:match('()([\r\n].)', pos_beg) if pos or not f then break elseif f then local chunk = f:read(CHUNK_SIZE) if chunk then buffer = buffer:sub(pos_beg) .. chunk pos_beg = 1 else f = nil end end end if not pos then pos = #buffer elseif chars == '\r\n' then pos = pos + 1 end local line = buffer:sub(pos_beg, pos) pos_beg = pos + 1 if #line > 0 then return line end end end -- -- Splits string s into array of lines, returning the result. -- New-line character sequences ("\n", "\r\n", "\r"), -- if any, are included at the ends of the lines. -- local function split_newlines(s) local ts = {} local posa = 1 while 1 do local pos, chars = s:match('()([\r\n].?)', posa) if pos then if chars == '\r\n' then pos = pos + 1 end local line = s:sub(posa, pos) ts[#ts+1] = line posa = pos + 1 else local line = s:sub(posa) if line ~= '' then ts[#ts+1] = line end break end end return ts end --[=[slower implementation local function split_newlines(s) local ts = {} local lastc s:gsub('([^\r\n]*)([\r\n])', function(a,b) if a == '' and lastc == '\r' and b == '\n' then ts[#ts] = ts[#ts] .. b lastc = nil else ts[#ts+1] = a .. b lastc = b end return '' end) local line = s:match('([^\r\n]+)$') if line then ts[#ts+1] = line end return ts end --]=] -- test suite -- utility function for test suite. -- Create mock file for string s. local function mock_file(s) local f = {} function f:read(n, ...) assert(type(n)=='number' and select('#', ...) == 0, 'NOT IMPL') local chunk = s:sub(1,n) s = s:sub(n+1) return chunk ~= '' and chunk or nil end return f end -- utility function for test suite. local function mytostring(s) return type(s) == 'string' and string.format('%q', s):gsub('\n','n') or tostring(s) end -- utility function for test suite. local function asserteq(a,b,level) level = (level or 1) + 1 if a ~= b then error(mytostring(a) .. '~=' .. mytostring(b), level) end end -- utility function for test suite (wrap file_lines) local function wrap1(s) local f = mock_file(s) local ts = {} for line in file_lines(f) do ts[#ts+1] = line end return table.concat(ts, '|') end -- utility function for test suite (wrap split_newlines) local function wrap2(s) return table.concat(split_newlines(s), '|') end local SZ = 1024 -- chunk size -- test basics for _,f in ipairs{wrap1, wrap2} do for _,i in ipairs{0,1,2,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do local s = (' '):rep(i) local function test(a, b) asserteq(f(s .. a), s .. b) end test('', '') test('\r', '\r') test('\n', '\n') test('a', 'a') test('\r\n', '\r\n') test('\n\r', '\n|\r') test('\r\r', '\r|\r') test('\n\n', '\n|\n') test('a\n', 'a\n') test('a\r', 'a\r') test('\na', '\n|a') test('\ra', '\r|a') end end -- check that two implementations are equivalent on a lot of data. local cs = {'', 'a', '\r', '\n', ' '} for _,i in ipairs{0,1,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do for j=0,1 do local s = (' '):rep(i + j * SZ) for _,c1 in ipairs(cs) do for _,c2 in ipairs(cs) do for _,c3 in ipairs(cs) do for _,c4 in ipairs(cs) do for _,c5 in ipairs(cs) do local s = c1 .. c2 .. c3 .. c4 ..c5 local t1 = wrap1(s) local t2 = wrap2(s) asserteq(t1, t2) end end end end end end end print 'DONE'
Note: these functions are used in LuaPatch.