lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Hi all,

Inspired by the "continue" discussion I coded up an iterator maker whose 
iterator iterates only over relevant lines of a file. This iterator maker 
takes exactly one argument, a table (tab). 

By default all nonblank lines are relevant, but that can be changed with a 
callback (tab.is_relevant()). There's also a callback called tab.tweak() that 
can do additional types of work on relevant lines. When constructing the 
table, the one element it MUST have is tab.file, which can be either a 
filename or a handle open for read.

I'm attaching the package, called relevantlines.lua, as well as a test data 
file called test.txt and a test-jig Lua file, called testrel.lua, to exercise 
relevantlines.lua.

I hope you enjoy it, and please be gentle when you tell me all the things I 
did wrong with it. :-)

Thanks

SteveT

Steve Litt
Recession Relief Package
http://www.recession-relief.US
Twitter: http://www.twitter.com/stevelitt

#!/usr/bin/lua

sf = string.format

require("relevantlines")

local relevant_lines = relevantlines.relevant_lines

tab = {file = "test.txt"}
for k, v in relevant_lines(tab) do
	print(sf("k=%d, v=%s", tab.this_line_number, tab.this_line_text))
end

tab.this_line_number = 0
tab.is_relevant = function() return true end
print("=====================")

for k, v in relevant_lines(tab) do
	print(sf("k=%d, v=%s", tab.this_line_number, tab.this_line_text))
end

tab.this_line_number = 0
tab.is_relevant = function()
	return string.match(tab.this_line_text, "%S") and
		not string.match(tab.this_line_text, "^%s*#")
end
tab.tweak = function() tab.this_line_text = string.upper(tab.this_line_text) end
print("=====================")

for k, v in relevant_lines(tab) do
	print(sf("k=%d, v=%s", tab.this_line_number, tab.this_line_text))
end

tab.this_line_number = 1000
tab.file = io.stdin

print("=====================")

for k, v in relevant_lines(tab) do
	print(sf("k=%d, v=%s", tab.this_line_number, tab.this_line_text))
end

one
two

#    three
#four



    #five
        six
         
seven
-- releventlines.lua Copyright (C) 2011 by Steve Litt, all rights reserved.
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
-- 
-- The above copyright notice and this permission notice shall be included in
-- all copies or substantial portions of the Software.
-- 
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-- THE SOFTWARE.`

-- Version 0.0.1, pre-alpha
-- relevant_lines() is an iterator maker that takes a table as its one and only
-- argument. At minimum this table must have a key called "file" whose value is
-- either the filename of an input file or an open-for-read handle. Other possible
-- elements to put in this table include:
-- this_line_number: One less than the first key to be delivered, defaults to 0
-- is_relevant: A callback function determining whether to bestow or skip this line
--   is_relevent defaults to "pass back all non-blank lines"
-- tweak: A callback to do other operations to relevant lines
-- 
-- Because of the table nature of the argument, you can put pretty much 
-- everything but the kitchen sink, and as long as either is_relevant() or
-- tweak() calls it, it will form part of the algorithm.

module(..., package.seeall);

function relevant_lines(tab)
	--print(type(tab.file))
	--os.exit(1)
	local handle
	tab.this_line_text = nil
	tab.prev_line_text = nil
	tab.this_line_number = tab.this_line_number or 0
	tab.prev_line_number = -1

	--### GET FILE HANDLE UP AND RUNNING
	if tab == nil then
		io.stderr:write("ERROR: Function relevant_lines() must have a single argument, a table.\n")
		io.stderr:write("Aborting...\n\n")
		os.exit(1)
	end
	if type(tab.file) == "nil" then
		io.stderr:write("ERROR: Table argument to relevant_lines() must have an element called file.\n")
		io.stderr:write("Aborting...\n\n")
		os.exit(1)
	elseif type(tab.file) == "userdata" then
		handle = tab.file
	elseif type(tab.file) == "string" then
		handle = assert(io.open(tab.file, "r"))
	else
		io.stderr:write("ERROR: Function relevant_lines(): tab.file has wierd type.\n")
		io.stderr:write("Aborting...\n\n")
		os.exit(1)
	end

	--### IF YOU GOT HERE, YOU OPENED THE FILE FOR INPUT

	--### DEFAULT THE CALLBACK IF NECESSARY. DEFAULT TO SKIP BLANK LINES
	if not tab.is_relevant then
		tab.is_relevant = function()
			return string.match(tab.this_line_text, "%S") -- skip blanks
		end
	end

	--### DEFINE THE ITERATOR TO RETURN
	return function()
		-- Read line and increment line count
		tab.this_line_text = handle:read("*line")
		tab.this_line_number = tab.this_line_number + 1

		-- Blow off any nonrelevant lines
		while tab.this_line_text and not tab.is_relevant() do
			tab.this_line_text = handle:read("*line")
			tab.this_line_number = tab.this_line_number + 1
		end

		-- Return nil if eof, and close handle if made from string
		if tab.this_line_text == nil then
			if type(tab.file) == "string" then
				io.close(handle)
			end
			return nil, nil

		else  -- Run tweak procedure and then return the line number and text
			if tab.tweak then tab.tweak() end;
			return tab.this_line_number, tab.this_line_text
		end
	end
end