summaryrefslogtreecommitdiff
path: root/assignments/4/tokenizer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'assignments/4/tokenizer.rb')
-rwxr-xr-xassignments/4/tokenizer.rb25
1 files changed, 25 insertions, 0 deletions
diff --git a/assignments/4/tokenizer.rb b/assignments/4/tokenizer.rb
new file mode 100755
index 0000000..0bd2e2d
--- /dev/null
+++ b/assignments/4/tokenizer.rb
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+
+WHITESPACE = Regexp.new('[[:blank:]]+')
+SPLITTABLES = [';', '(', ')']
+
+def tokenize(code)
+ pattern = Regexp.new("[^#{Regexp.escape(SPLITTABLES.join)}]+")
+ output = []
+ tokens = code.chomp.strip.split(WHITESPACE)
+ tokens.each do |token|
+ prefix, stem, suffix = token.partition(pattern)
+ output << prefix.split('') unless prefix.empty?
+ output << stem unless stem.empty?
+ output << suffix.split('') unless suffix.empty?
+ end
+
+ output.flatten
+end
+
+
+code = ARGV[0]
+tokens = tokenize(code)
+
+puts "Input: #{code.inspect}"
+puts "Tokens: #{tokens.inspect}"