summaryrefslogtreecommitdiff
path: root/assignments/4
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-01-23 10:22:08 -0700
committermo khan <mo@mokhan.ca>2025-01-23 10:22:08 -0700
commit7031e53d352b3b8cb7e9e403a48818ff585b6bc2 (patch)
treec20a098be054aac19413e37c3fff385929d1c555 /assignments/4
parent69030c3e15ff79b1a2b40b97a2c418f6b3474697 (diff)
Complete question from chapter 11.
Diffstat (limited to 'assignments/4')
-rwxr-xr-xassignments/4/tokenizer.rb25
1 files changed, 25 insertions, 0 deletions
diff --git a/assignments/4/tokenizer.rb b/assignments/4/tokenizer.rb
new file mode 100755
index 0000000..0bd2e2d
--- /dev/null
+++ b/assignments/4/tokenizer.rb
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+
+WHITESPACE = Regexp.new('[[:blank:]]+')
+SPLITTABLES = [';', '(', ')']
+
+def tokenize(code)
+ pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+")
+ output = []
+ tokens = code.chomp.strip.split(WHITESPACE)
+ tokens.each do |token|
+ prefix, stem, suffix = token.partition(pattern)
+ output << prefix.split('') unless prefix.empty?
+ output << stem unless stem.empty?
+ output << suffix.split('') unless suffix.empty?
+ end
+
+ output.flatten
+end
+
+
+code = ARGV[0]
+tokens = tokenize(code)
+
+puts "Input: #{code.inspect}"
+puts "Tokens: #{tokens.inspect}"