blob: 0bd2e2d87f16be7b90163878c2e6cb32a593f400 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
#!/usr/bin/env ruby
WHITESPACE = Regexp.new('[[:blank:]]+')
SPLITTABLES = [';', '(', ')']
def tokenize(code)
pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+")
output = []
tokens = code.chomp.strip.split(WHITESPACE)
tokens.each do |token|
prefix, stem, suffix = token.partition(pattern)
output << prefix.split('') unless prefix.empty?
output << stem unless stem.empty?
output << suffix.split('') unless suffix.empty?
end
output.flatten
end
code = ARGV[0]
tokens = tokenize(code)
puts "Input: #{code.inspect}"
puts "Tokens: #{tokens.inspect}"
|