From da99e220c12928481a77c7f68a4c7a8a72941863 Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Wed, 31 Jan 2024 16:16:24 -0800 Subject: [PATCH] chore: get cleaner running --- Gemfile.lock | 131 +++++++++++++++--------------- app/services/dashboard/cleaner.rb | 1 + dashboard.gemspec | 2 +- lib/dashboard/version.rb | 2 +- lib/tasks/clean.rake | 36 ++++++++ 5 files changed, 105 insertions(+), 67 deletions(-) create mode 100644 lib/tasks/clean.rake diff --git a/Gemfile.lock b/Gemfile.lock index fb53c00..60f83d8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - dashboard (0.1.10) + dashboard (0.1.11) bcrypt_pbkdf cssbundling-rails ed25519 @@ -10,7 +10,6 @@ PATH net-sftp rails (>= 7.1.2) rspec-rails - standard_deviation stimulus-rails turbo-rails watir @@ -18,35 +17,35 @@ PATH GEM remote: https://rubygems.org/ specs: - actioncable (7.1.2) - actionpack (= 7.1.2) - activesupport (= 7.1.2) + actioncable (7.1.3) + actionpack (= 7.1.3) + activesupport (= 7.1.3) nio4r (~> 2.0) websocket-driver (>= 0.6.1) zeitwerk (~> 2.6) - actionmailbox (7.1.2) - actionpack (= 7.1.2) - activejob (= 7.1.2) - activerecord (= 7.1.2) - activestorage (= 7.1.2) - activesupport (= 7.1.2) + actionmailbox (7.1.3) + actionpack (= 7.1.3) + activejob (= 7.1.3) + activerecord (= 7.1.3) + activestorage (= 7.1.3) + activesupport (= 7.1.3) mail (>= 2.7.1) net-imap net-pop net-smtp - actionmailer (7.1.2) - actionpack (= 7.1.2) - actionview (= 7.1.2) - activejob (= 7.1.2) - activesupport (= 7.1.2) + actionmailer (7.1.3) + actionpack (= 7.1.3) + actionview (= 7.1.3) + activejob (= 7.1.3) + activesupport (= 7.1.3) mail (~> 2.5, >= 2.5.4) net-imap net-pop net-smtp rails-dom-testing (~> 2.2) - actionpack (7.1.2) - actionview (= 7.1.2) - activesupport (= 7.1.2) + actionpack (7.1.3) + actionview (= 7.1.3) + activesupport (= 7.1.3) nokogiri (>= 1.8.5) racc rack (>= 2.2.4) @@ -54,35 +53,35 @@ GEM rack-test (>= 0.6.3) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - actiontext (7.1.2) - actionpack (= 7.1.2) - activerecord (= 7.1.2) - activestorage (= 7.1.2) - activesupport (= 7.1.2) + actiontext (7.1.3) + actionpack (= 7.1.3) + activerecord (= 7.1.3) + activestorage (= 7.1.3) + activesupport (= 7.1.3) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.1.2) - activesupport (= 7.1.2) + actionview (7.1.3) + activesupport (= 7.1.3) builder (~> 3.1) erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - activejob (7.1.2) - activesupport (= 7.1.2) + activejob (7.1.3) + activesupport (= 7.1.3) globalid (>= 0.3.6) - activemodel (7.1.2) - activesupport (= 7.1.2) - activerecord (7.1.2) - activemodel (= 7.1.2) - activesupport (= 7.1.2) + activemodel (7.1.3) + activesupport (= 7.1.3) + activerecord (7.1.3) + activemodel (= 7.1.3) + activesupport (= 7.1.3) timeout (>= 0.4.0) - activestorage (7.1.2) - actionpack (= 7.1.2) - activejob (= 7.1.2) - activerecord (= 7.1.2) - activesupport (= 7.1.2) + activestorage (7.1.3) + actionpack (= 7.1.3) + activejob (= 7.1.3) + activerecord (= 7.1.3) + activesupport (= 7.1.3) marcel (~> 1.0) - activesupport (7.1.2) + activesupport (7.1.3) base64 bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) @@ -97,19 +96,19 @@ GEM rake (>= 10.4, < 14.0) base64 (0.2.0) bcrypt_pbkdf (1.1.0) - bigdecimal (3.1.5) + bigdecimal (3.1.6) bindex (0.8.1) builder (3.2.4) - concurrent-ruby (1.2.2) + concurrent-ruby (1.2.3) connection_pool (2.4.1) crass (1.0.6) - cssbundling-rails (1.3.3) + cssbundling-rails (1.4.0) railties (>= 6.0.0) date (3.3.4) debug (1.9.1) irb (~> 1.10) reline (>= 0.3.8) - diff-lcs (1.5.0) + diff-lcs (1.5.1) drb (2.2.0) ruby2_keywords ed25519 (1.3.0) @@ -125,11 +124,11 @@ GEM activesupport (>= 6.1) i18n (1.14.1) concurrent-ruby (~> 1.0) - io-console (0.7.1) + io-console (0.7.2) irb (1.11.1) rdoc reline (>= 0.4.2) - jsbundling-rails (1.2.2) + jsbundling-rails (1.3.0) railties (>= 6.0.0) loofah (2.22.0) crass (~> 1.0.2) @@ -141,7 +140,7 @@ GEM net-smtp marcel (1.0.2) mini_mime (1.1.5) - minitest (5.20.0) + minitest (5.21.2) mutex_m (0.2.0) net-imap (0.4.9.1) date @@ -164,7 +163,7 @@ GEM puma (6.4.2) nio4r (~> 2.0) racc (1.7.3) - rack (3.0.8) + rack (3.0.9) rack-session (2.0.0) rack (>= 3.0.0) rack-test (2.1.0) @@ -172,20 +171,20 @@ GEM rackup (2.1.0) rack (>= 3) webrick (~> 1.8) - rails (7.1.2) - actioncable (= 7.1.2) - actionmailbox (= 7.1.2) - actionmailer (= 7.1.2) - actionpack (= 7.1.2) - actiontext (= 7.1.2) - actionview (= 7.1.2) - activejob (= 7.1.2) - activemodel (= 7.1.2) - activerecord (= 7.1.2) - activestorage (= 7.1.2) - activesupport (= 7.1.2) + rails (7.1.3) + actioncable (= 7.1.3) + actionmailbox (= 7.1.3) + actionmailer (= 7.1.3) + actionpack (= 7.1.3) + actiontext (= 7.1.3) + actionview (= 7.1.3) + activejob (= 7.1.3) + activemodel (= 7.1.3) + activerecord (= 7.1.3) + activestorage (= 7.1.3) + activesupport (= 7.1.3) bundler (>= 1.15.0) - railties (= 7.1.2) + railties (= 7.1.3) rails-dom-testing (2.2.0) activesupport (>= 5.0.0) minitest @@ -193,9 +192,9 @@ GEM rails-html-sanitizer (1.6.0) loofah (~> 2.21) nokogiri (~> 1.14) - railties (7.1.2) - actionpack (= 7.1.2) - activesupport (= 7.1.2) + railties (7.1.3) + actionpack (= 7.1.3) + activesupport (= 7.1.3) irb rackup (>= 1.0.0) rake (>= 12.2) @@ -216,7 +215,7 @@ GEM rspec-mocks (3.12.6) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.12.0) - rspec-rails (6.1.0) + rspec-rails (6.1.1) actionpack (>= 6.1) activesupport (>= 6.1) railties (>= 6.1) @@ -227,7 +226,8 @@ GEM rspec-support (3.12.1) ruby2_keywords (0.0.5) rubyzip (2.3.2) - selenium-webdriver (4.16.0) + selenium-webdriver (4.17.0) + base64 (~> 0.2) rexml (~> 3.2, >= 3.2.5) rubyzip (>= 1.2.2, < 3.0) websocket (~> 1.0) @@ -276,6 +276,7 @@ DEPENDENCIES pg puma sprockets-rails + standard_deviation web-console BUNDLED WITH diff --git a/app/services/dashboard/cleaner.rb b/app/services/dashboard/cleaner.rb index d1d2969..0313a42 100644 --- a/app/services/dashboard/cleaner.rb +++ b/app/services/dashboard/cleaner.rb @@ -14,6 +14,7 @@ module Dashboard Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath| puts filepath File.open(filepath) do |file| + puts "opening file" processed_data = process_raw_file(file:) processed_data in [headers, clean_csv, log_csv, data] return if data.empty? diff --git a/dashboard.gemspec b/dashboard.gemspec index a42c506..e012a8e 100644 --- a/dashboard.gemspec +++ b/dashboard.gemspec @@ -30,10 +30,10 @@ Gem::Specification.new do |spec| spec.add_dependency "net-sftp" spec.add_dependency "rails", ">= 7.1.2" spec.add_dependency "rspec-rails" - spec.add_dependency "standard_deviation" spec.add_dependency "stimulus-rails" spec.add_dependency "turbo-rails" spec.add_dependency "watir" spec.add_development_dependency "factory_bot_rails" + spec.add_development_dependency "standard_deviation" end diff --git a/lib/dashboard/version.rb b/lib/dashboard/version.rb index 0f0234f..66aa56b 100644 --- a/lib/dashboard/version.rb +++ b/lib/dashboard/version.rb @@ -1,3 +1,3 @@ module Dashboard - VERSION = "0.1.10" + VERSION = "0.1.11" end diff --git a/lib/tasks/clean.rake b/lib/tasks/clean.rake new file mode 100644 index 0000000..2d3f3f2 --- /dev/null +++ b/lib/tasks/clean.rake @@ -0,0 +1,36 @@ +namespace :dashboard do + namespace :clean do + # These tasks must be run in their respective project so the correct schools are in the database + desc "clean ecp data" + task ecp: :environment do + input_filepath = Rails.root.join("tmp", "data", "ecp_data", "raw") + output_filepath = Rails.root.join("tmp", "data", "ecp_data", "clean") + log_filepath = Rails.root.join("tmp", "data", "ecp_data", "removed") + puts "running cleaner" + Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean + end + + desc "clean prepped data" + task prepped: :environment do + input_filepath = Rails.root.join("tmp", "data", "ecp_data", "prepped") + output_filepath = Rails.root.join("tmp", "data", "ecp_data", "prepped", "clean") + log_filepath = Rails.root.join("tmp", "data", "ecp_data", "prepped", "removed") + Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean + end + desc "clean mciea data" + task mciea: :environment do + input_filepath = Rails.root.join("tmp", "data", "mciea_data", "raw") + output_filepath = Rails.root.join("tmp", "data", "mciea_data", "clean") + log_filepath = Rails.root.join("tmp", "data", "mciea_data", "removed") + Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean + end + + desc "clean rpp data" + task rpp: :environment do + input_filepath = Rails.root.join("tmp", "data", "rpp_data", "raw") + output_filepath = Rails.root.join("tmp", "data", "rpp_data", "clean") + log_filepath = Rails.root.join("tmp", "data", "rpp_data", "removed") + Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean + end + end +end