diff options
Diffstat (limited to 'assignments/4/tokenizer.rb')
| -rwxr-xr-x | assignments/4/tokenizer.rb | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/assignments/4/tokenizer.rb b/assignments/4/tokenizer.rb new file mode 100755 index 0000000..0bd2e2d --- /dev/null +++ b/assignments/4/tokenizer.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +WHITESPACE = Regexp.new('[[:blank:]]+') +SPLITTABLES = [';', '(', ')'] + +def tokenize(code) + pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+") + output = [] + tokens = code.chomp.strip.split(WHITESPACE) + tokens.each do |token| + prefix, stem, suffix = token.partition(pattern) + output << prefix.split('') unless prefix.empty? + output << stem unless stem.empty? + output << suffix.split('') unless suffix.empty? + end + + output.flatten +end + + +code = ARGV[0] +tokens = tokenize(code) + +puts "Input: #{code.inspect}" +puts "Tokens: #{tokens.inspect}" |
