diff options
| author | mo khan <mo@mokhan.ca> | 2025-01-23 10:22:08 -0700 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-01-23 10:22:08 -0700 |
| commit | 7031e53d352b3b8cb7e9e403a48818ff585b6bc2 (patch) | |
| tree | c20a098be054aac19413e37c3fff385929d1c555 /assignments/4 | |
| parent | 69030c3e15ff79b1a2b40b97a2c418f6b3474697 (diff) | |
Complete question from chapter 11.
Diffstat (limited to 'assignments/4')
| -rwxr-xr-x | assignments/4/tokenizer.rb | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/assignments/4/tokenizer.rb b/assignments/4/tokenizer.rb new file mode 100755 index 0000000..0bd2e2d --- /dev/null +++ b/assignments/4/tokenizer.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +WHITESPACE = Regexp.new('[[:blank:]]+') +SPLITTABLES = [';', '(', ')'] + +def tokenize(code) + pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+") + output = [] + tokens = code.chomp.strip.split(WHITESPACE) + tokens.each do |token| + prefix, stem, suffix = token.partition(pattern) + output << prefix.split('') unless prefix.empty? + output << stem unless stem.empty? + output << suffix.split('') unless suffix.empty? + end + + output.flatten +end + + +code = ARGV[0] +tokens = tokenize(code) + +puts "Input: #{code.inspect}" +puts "Tokens: #{tokens.inspect}" |
