From 9fa05b16245358581ed85ac1c308c6ce2cee6e0a Mon Sep 17 00:00:00 2001 From: mo khan Date: Thu, 26 Jan 2017 09:56:50 -0700 Subject: parallize seed data insertion. --- Gemfile | 1 + Gemfile.lock | 2 ++ README.md | 17 +++++++++++++++++ Rakefile | 7 ------- db/migrations/0002_create_computer.rb | 3 ++- db/seeds.rb | 31 ++++++++++++++++++------------- 6 files changed, 40 insertions(+), 21 deletions(-) create mode 100644 README.md diff --git a/Gemfile b/Gemfile index 30199ce..4f183f1 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,7 @@ # frozen_string_literal: true source "https://rubygems.org" +gem 'concurrent-ruby', require: 'concurrent' gem 'ffaker' gem 'mysql2' gem 'rake' diff --git a/Gemfile.lock b/Gemfile.lock index 3808837..679e0bc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,7 @@ GEM remote: https://rubygems.org/ specs: + concurrent-ruby (1.0.4) ffaker (2.4.0) mysql2 (0.4.5) rake (12.0.0) @@ -10,6 +11,7 @@ PLATFORMS ruby DEPENDENCIES + concurrent-ruby ffaker mysql2 rake diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a3d879 --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# Schema + +| BUSINESSES | | COMPUTERS | | EVENTS | | +| id | int | id | int | id | int | +| name | varchar(255) | active | tinyint | computer_id | int | +| business_relationship_id | int | business_id | int | occurred_at | datetime | +| | | | | type | varchar(255) | +| | | | | data | text | + +# Lessons + +1. Which businesses have more than N computers? +2. Which computer had the most events in a single day? +3. What are the top 10 most active computers (day, week, month)? .i.e emits the most events. +4. Which businesses have at least one computer? +5. Which businesses have zero computers? +6. Do we have any computers that belong to a business that doesn't exist? diff --git a/Rakefile b/Rakefile index 1093661..e767dae 100644 --- a/Rakefile +++ b/Rakefile @@ -5,13 +5,6 @@ Bundler.require(:default) DATABASE_NAME = 'sql_bootcamp' DATABASE = Sequel.connect("mysql2://root@localhost/#{DATABASE_NAME}") -#| BUSINESSES | | COMPUTERS | | EVENTS | | -#| id | int | id | int | id | int | -#| name | varchar(255) | active | tinyint | computer_id | int | -#| business_relationship_id | int | business_id | int | occurred_at | datetime | -#| | | | | type | varchar(255) | -#| | | | | data | text | - namespace :db do def pipe_to_mysql(command) `echo "#{command}" | mysql -u root` diff --git a/db/migrations/0002_create_computer.rb b/db/migrations/0002_create_computer.rb index 4a61ce8..6adf0d8 100644 --- a/db/migrations/0002_create_computer.rb +++ b/db/migrations/0002_create_computer.rb @@ -3,7 +3,8 @@ Sequel.migration do create_table :computers do primary_key :id TrueClass :active, null: false, default: true - foreign_key :business_id, :businesses + #foreign_key :business_id, :businesses + Integer :business_id # intentionally remove foreign key to allow for orphan rows. end end diff --git a/db/seeds.rb b/db/seeds.rb index 1127ae7..feb8f69 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -1,23 +1,28 @@ require 'json' +require 'prime' -businesses = DATABASE[:businesses] -computers = DATABASE[:computers] -events = DATABASE[:events] +pool = Concurrent::FixedThreadPool.new(10) event_types = (1..5).to_a 10.times do - business_id = businesses.insert(name: FFaker::Company.name, business_relationship_id: rand(3)) + business_id = DATABASE[:businesses].insert(name: FFaker::Company.name, business_relationship_id: rand(3)) + puts "Created business: #{business_id}" - 10.times do - computer_id = computers.insert(active: rand(100).even?, business_id: business_id) + pool.post do + rand(100).times do |n| + computer_id = DATABASE[:computers].insert(active: rand(100).even?, business_id: business_id.prime? ? business_id * 42 : business_id) + puts "Created computer: #{computer_id} on #{Thread.current.object_id}" - 10.times do - data = JSON.generate({ - ip_address: FFaker::Internet.ip_v4_address, - mac: FFaker::Internet.mac, - url: FFaker::Internet.http_url, - }) - events.insert(computer_id: computer_id, occurred_at: DateTime.parse(FFaker::Time.datetime), type: event_types.sample, data: data) + rand(50).times do + data = JSON.generate({ + ip_address: FFaker::Internet.ip_v4_address, + mac: FFaker::Internet.mac, + url: FFaker::Internet.http_url, + }) + DATABASE[:events].insert(computer_id: computer_id, occurred_at: DateTime.parse(FFaker::Time.datetime), type: event_types.sample, data: data) + end end end end + +pool.wait_for_termination -- cgit v1.2.3