#!/usr/bin/env ruby WHITESPACE = Regexp.new('[[:blank:]]+') SPLITTABLES = [';', '(', ')'] def tokenize(code) pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+") output = [] tokens = code.chomp.strip.split(WHITESPACE) tokens.each do |token| prefix, stem, suffix = token.partition(pattern) output << prefix.split('') unless prefix.empty? output << stem unless stem.empty? output << suffix.split('') unless suffix.empty? end output.flatten end code = ARGV[0] tokens = tokenize(code) puts "Input: #{code.inspect}" puts "Tokens: #{tokens.inspect}"