Skip to content

Commit

Permalink
Vfep 1478 - Adding/updating tests to increase coverage on GIBCT (#1134)
Browse files Browse the repository at this point in the history
* add tests

* refactoring and more tests

* fix download_csv to not call externally and stub response correctly

* temporary test to check why spec is failing

* temporary test to check why spec is failing

* temporary test to check why spec is failing

* remove puts, add expect for hcm

* adjust specs to try to fix sprockets issue. change dashboards controller spec

* fix failing test for dashboards controller spec

* fix failing test for dashboards controller spec

* fix failing test for dashboards controller spec

* fix failing test for dashboards controller spec

* fix failing test for dashboards controller spec - nocov

* fix unzipper spec

* fix unzipper spec

* fix unzipper spec

* fix unzipper spec

* fix unzipper spec

* fix permissions on tmp

* fix permissions on tmp

* fix permissions on tmp

* fix permissions on tmp

* revert Dockerfile

* Dockerfile see what filesystem looks lik

* Dockerfile see what filesystem looks like

* Dockerfile see what filesystem looks like

* add comment to Dockerfile fix, add back in tests that were failing due to tmp permissions issue

* remove comment from Dockerfile

* ls -l in Dockerfile

* remove fix for sprockets, revert Dockerfile to original
  • Loading branch information
nfstern02 authored May 29, 2024
1 parent 3808ec9 commit baf44c8
Show file tree
Hide file tree
Showing 21 changed files with 441 additions and 135 deletions.
115 changes: 19 additions & 96 deletions app/controllers/dashboards_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,31 +165,25 @@ def flash_progress_if_needed
def upload_file(class_nm, csv)
if CSV_TYPES_NO_API_KEY_TABLE_NAMES.include?(class_nm)
klass = Object.const_get(class_nm)
# added klass to unzip_csv as sometimes the download does not need unzipped
if download_csv(klass) && unzip_csv(klass)

if download_csv(class_nm) && unzip_csv(class_nm)
upload = Upload.from_csv_type(params[:csv_type])
upload.user = current_user
file = "tmp/#{params[:csv_type]}s.csv"
file = 'tmp/InstitutionCampus.csv' if class_nm.eql?('AccreditationInstituteCampus')
file = 'tmp/hd2022.csv' if class_nm.eql?('IpedsHd')
file = 'tmp/ic2022_ay.csv' if class_nm.eql?('IpedsIcAy')
file = 'tmp/ic2022_py.csv' if class_nm.eql?('IpedsIcPy')
file = 'tmp/ic2022.csv' if class_nm.eql?('IpedsIc')
file = 'tmp/hcm.xlsx' if klass.name.eql?('Hcm')
if klass.name.eql?('EightKey')
file = 'tmp/eight_key.xls'
convert_xls_to_csv(file, 'tmp/eight_key.csv')
file = 'tmp/eight_key.csv'
end
skipline = 0
skipline = 2 if klass.name.eql?('Hcm')

upload.csv = file
upload.csv =
if class_nm.eql?('EightKey') # This guy doesn't load properly with roo as is.
FileTypeConverters::XlsToCsv.new('tmp/eight_key.xls', 'tmp/eight_key.csv').convert_xls_to_csv
else
NoKeyApis::NoKeyApiDownloader::API_DOWNLOAD_CONVERSION_NAMES[class_nm] || "tmp/#{params[:csv_type]}s.csv"
end

skipline = class_nm.eql?('Hcm') ? 2 : 0

file_options = {
liberal_parsing: upload.liberal_parsing,
sheets: [{ klass: klass, skip_lines: skipline, clean_rows: upload.clean_rows }]
}
klass.load_with_roo(file, file_options).first

klass.load_with_roo(upload.csv, file_options).first
upload.update(ok: true, completed_at: Time.now.utc.to_fs(:db))
flash.notice = 'Successfully fetched & uploaded file' if upload.save!
end
Expand All @@ -205,85 +199,14 @@ def upload_file(class_nm, csv)
flash.alert = message
end

# :nocov:
def download_csv(klass)
# rubocop:disable Style/EmptyCaseCondition
# the most recent IPED data files are from 2022. This should be checked periodically.
# the most recent Hcm data files are from 2020. This should be checked periodically.
case
when klass.name.start_with?('Accreditation')
_stdout, _stderr, status = Open3.capture3("curl -X POST \
https://ope.ed.gov/dapip/api/downloadFiles/accreditationDataFiles \
-H 'Content-Type: application/json' -d '{\"CSVChecked\":true,\"ExcelChecked\":false}' -o tmp/download.zip")
when klass.name.eql?('IpedsHd')
_stdout, _stderr, status = Open3.capture3("curl -X GET \
https://nces.ed.gov/ipeds/datacenter/data/HD2022.zip \
-H 'Content-Type: application/json' -o tmp/download.zip")
when klass.name.eql?('IpedsIcAy')
_stdout, _stderr, status = Open3.capture3("curl -X GET \
https://nces.ed.gov/ipeds/datacenter/data/IC2022_AY.zip \
-H 'Content-Type: application/json' -o tmp/download.zip")
when klass.name.eql?('IpedsIcPy')
_stdout, _stderr, status = Open3.capture3("curl -X GET \
https://nces.ed.gov/ipeds/datacenter/data/IC2022_PY.zip \
-H 'Content-Type: application/json' -o tmp/download.zip")
when klass.name.eql?('IpedsIc')
_stdout, _stderr, status = Open3.capture3("curl -X GET \
https://nces.ed.gov/ipeds/datacenter/data/IC2022.zip \
-H 'Content-Type: application/json' -o tmp/download.zip")
when klass.name.eql?('Hcm') # without User-Agent server blocks download
_stdout, _stderr, status = Open3.capture3('curl -o tmp/hcm.xlsx \
-H "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0" \
https://studentaid.gov/sites/default/files/Schools-on-HCM-December2023.xlsx')
when klass.name.eql?('EightKey')
_stdout, _stderr, status = Open3.capture3("curl -X GET \
https://www2.ed.gov/documents/military/8-keys-sites.xls \
-H 'Content-Type: application/json' -o tmp/eight_key.xls")

end
# rubocop:enable Style/EmptyCaseCondition
status.success?
def download_csv(class_nm)
NoKeyApis::NoKeyApiDownloader.new(class_nm).download_csv
end
# :nocov:

# This is a candidate for a utility class. Right now, this is the only place we needed it.
# If some other process needs it, it should probably be refactored to a utility class.
def unzip_csv(klass)
# Some downloads do are not a zip file, so return true
return true if klass.name.eql?('Hcm') || klass.name.eql?('EightKey')

Zip::File.open('tmp/download.zip') do |zip_file|
zip_file.each do |f|
f_path = File.join('tmp', f.name)
FileUtils.mkdir_p(File.dirname(f_path)) unless File.exist?(File.dirname(f_path))
File.delete(f_path) if File.exist?(f_path)
zip_file.extract(f, f_path)
end
end
true
rescue StandardError => _e
false
end
def unzip_csv(class_nm)
# Some downloads do are not a zip file, so skip and return true
return true if class_nm.eql?('Hcm') || class_nm.eql?('EightKey')

def convert_xls_to_csv(xls_path, csv_path)
book = Spreadsheet.open(xls_path)
sheet = book.worksheet(0) # Assuming the 'opeid' is in the first sheet

CSV.open(csv_path, 'wb') do |csv|
sheet.each do |row|
formatted_row = row.to_a.map do |cell|
cell_value = cell.is_a?(Float) ? format('%.0f', cell) : cell.to_s.strip
# Apply zero-padding for 'opeid' if necessary
if cell_value =~ /^\d+$/ && cell_value.length <= 8
# Format the number to be exactly eight digits
formatted_number = cell_value.rjust(8, '0')
formatted_number
else
cell_value
end
end
csv << formatted_row
end
end
ZipFileUtils::Unzipper.new.unzip_the_file
end
end
35 changes: 35 additions & 0 deletions app/utilities/file_type_converters/xls_to_csv.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

# some older versions of Excel spreadsheets don't work well with Roo.
module FileTypeConverters
class XlsToCsv
attr_accessor :xls_file_name, :csv_file_name

def initialize(xls_file_name, csv_file_name)
@xls_file_name = xls_file_name
@csv_file_name = csv_file_name
end

def convert_xls_to_csv
book = Spreadsheet.open(@xls_file_name)
sheet = book.worksheet(0)

CSV.open(@csv_file_name, 'wb') do |csv|
sheet.each do |row|
formatted_row = row.to_a.map do |cell|
cell_value = cell.is_a?(Float) ? format('%.0f', cell) : cell.to_s.strip
if cell_value =~ /^\d+$/ && cell_value.length <= 8
# Format the number to be exactly eight digits
formatted_number = cell_value.rjust(8, '0')
formatted_number
else
cell_value
end
end
csv << formatted_row
end
end
@csv_file_name
end
end
end
65 changes: 65 additions & 0 deletions app/utilities/no_key_apis/no_key_api_downloader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# frozen_string_literal: true

module NoKeyApis
class NoKeyApiDownloader
# the most recent IPED data files are from 2022. This should be checked periodically.
# the most recent Hcm data files are from 2020. This should be checked periodically.
# changes will need to be made to both hashes when these change
API_DOWNLOAD_CONVERSION_NAMES = {
'AccreditationInstituteCampus' => 'tmp/InstitutionCampus.csv',
'Hcm' => 'tmp/hcm.xlsx',
'IpedsHd' => 'tmp/hd2022.csv',
'IpedsIcAy' => 'tmp/ic2022_ay.csv',
'IpedsIcPy' => 'tmp/ic2022_py.csv',
'IpedsIc' => 'tmp/ic2022.csv'
}.freeze

API_NO_KEY_DOWNLOAD_SOURCES = {
'Accreditation' => [' -X POST', 'https://ope.ed.gov/dapip/api/downloadFiles/accreditationDataFiles'],
'AccreditationAction' => [' -X POST', 'https://ope.ed.gov/dapip/api/downloadFiles/accreditationDataFiles'],
'AccreditationInstituteCampus' => [' -X POST', 'https://ope.ed.gov/dapip/api/downloadFiles/accreditationDataFiles'],
'AccreditationRecord' => [' -X POST', 'https://ope.ed.gov/dapip/api/downloadFiles/accreditationDataFiles'],
'EightKey' => [' -X GET', 'https://www2.ed.gov/documents/military/8-keys-sites.xls'],
'Hcm' => ['', 'https://studentaid.gov/sites/default/files/Schools-on-HCM-December2023.xlsx'],
'IpedsHd' => [' -X GET', 'https://nces.ed.gov/ipeds/datacenter/data/HD2022.zip'],
'IpedsIc' => [' -X GET', 'https://nces.ed.gov/ipeds/datacenter/data/IC2022.zip'],
'IpedsIcAy' => [' -X GET', 'https://nces.ed.gov/ipeds/datacenter/data/IC2022_AY.zip'],
'IpedsIcPy' => [' -X GET', 'https://nces.ed.gov/ipeds/datacenter/data/IC2022_PY.zip']
}.freeze

attr_accessor :class_nm, :curl_command

def initialize(class_nm)
@class_nm = class_nm
rest_command, url = API_NO_KEY_DOWNLOAD_SOURCES[@class_nm]
@curl_command = "curl#{rest_command} #{url} #{h_parm} #{o_parm}#{d_parm}"
end

def download_csv
_stdout, _stderr, status = Open3.capture3(@curl_command)

status.success?
end

private

def h_parm
return '-H "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0"' if @class_nm.eql?('Hcm')

'-H \'Content-Type: application/json\''
end

def o_parm
return '-o tmp/hcm.xlsx' if @class_nm.eql?('Hcm')
return '-o tmp/eight_key.xls' if @class_nm.eql?('EightKey')

'-o tmp/download.zip'
end

def d_parm
return '' unless @class_nm.start_with?('Accreditation')

" -d '{\"CSVChecked\":true,\"ExcelChecked\":false}'"
end
end
end
26 changes: 26 additions & 0 deletions app/utilities/zip_file_utils/unzipper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

module ZipFileUtils
class Unzipper
attr_accessor :zip_file_name

# default to tmp/download.zip if nothing's passed in
def initialize(zip_file_name = 'tmp/download.zip')
@zip_file_name = zip_file_name
end

def unzip_the_file
Zip::File.open(@zip_file_name) do |zip_file|
zip_file.each do |f|
f_path = File.join('tmp', f.name)
FileUtils.mkdir_p(File.dirname(f_path)) unless File.exist?(File.dirname(f_path))
File.delete(f_path) if File.exist?(f_path)
zip_file.extract(f, f_path)
end
end
true
rescue StandardError => _e
false
end
end
end
10 changes: 5 additions & 5 deletions config/initializers/csv_types.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Rails.application.config.to_prepare do
CSV_TYPES_TABLES = [
CSV_TYPES_TABLES ||= [
{ klass: AccreditationAction, required?: true, has_api?: true, no_api_key?: true },
{ klass: AccreditationInstituteCampus, required?: true, has_api?: true, no_api_key?: true },
{ klass: AccreditationRecord, required?: true, has_api?: true, no_api_key?: true },
Expand Down Expand Up @@ -38,8 +38,8 @@
{ klass: Section1015, required?: false }
].freeze

CSV_TYPES_HAS_API_TABLE_NAMES = CSV_TYPES_TABLES.select { |table| table[:has_api?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_NO_API_KEY_TABLE_NAMES = CSV_TYPES_TABLES.select { |table| table[:no_api_key?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_NO_UPLOAD_TABLE_NAMES = CSV_TYPES_TABLES.select { |table| table[:no_upload?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_ALL_TABLES_CLASSES = CSV_TYPES_TABLES.map { |table| table[:klass] }.freeze
CSV_TYPES_HAS_API_TABLE_NAMES ||= CSV_TYPES_TABLES.select { |table| table[:has_api?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_NO_API_KEY_TABLE_NAMES ||= CSV_TYPES_TABLES.select { |table| table[:no_api_key?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_NO_UPLOAD_TABLE_NAMES ||= CSV_TYPES_TABLES.select { |table| table[:no_upload?] }.map { |table| table[:klass].name }.freeze
CSV_TYPES_ALL_TABLES_CLASSES ||= CSV_TYPES_TABLES.map { |table| table[:klass] }.freeze
end
4 changes: 2 additions & 2 deletions config/initializers/group_types.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Rails.application.config.to_prepare do
GROUP_FILE_TYPES = [
GROUP_FILE_TYPES ||= [
{
klass: 'Accreditation',
required?: true,
Expand All @@ -11,5 +11,5 @@
},
].freeze

GROUP_FILE_TYPES_NAMES = GROUP_FILE_TYPES.map { |g| g[:klass] }.freeze
GROUP_FILE_TYPES_NAMES ||= GROUP_FILE_TYPES.map { |g| g[:klass] }.freeze
end
8 changes: 4 additions & 4 deletions config/initializers/upload_types.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Rails.application.config.to_prepare do
UPLOAD_TYPES = [
UPLOAD_TYPES ||= [
*GROUP_FILE_TYPES,
*CSV_TYPES_TABLES,
].freeze

UPLOAD_TYPES_ALL_NAMES = UPLOAD_TYPES.map do |upload|
UPLOAD_TYPES_ALL_NAMES ||= UPLOAD_TYPES.map do |upload|
klass = upload[:klass]
if klass.is_a? String
klass
Expand All @@ -13,7 +13,7 @@
end
end.freeze

UPLOAD_TYPES_REQUIRED_NAMES = UPLOAD_TYPES.select { |upload| upload[:required?] }.map do |upload|
UPLOAD_TYPES_REQUIRED_NAMES ||= UPLOAD_TYPES.select { |upload| upload[:required?] }.map do |upload|
klass = upload[:klass]
if klass.is_a? String
klass
Expand All @@ -22,7 +22,7 @@
end
end.freeze

UPLOAD_TYPES_NO_PROD_NAMES = UPLOAD_TYPES.select { |upload| upload[:not_prod_ready?] }.map do |upload|
UPLOAD_TYPES_NO_PROD_NAMES ||= UPLOAD_TYPES.select { |upload| upload[:not_prod_ready?] }.map do |upload|
klass = upload[:klass]
if klass.is_a? String
klass
Expand Down
25 changes: 11 additions & 14 deletions spec/controllers/dashboards_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -178,33 +178,30 @@ def load_table(klass)
expect(flash.alert).to include(message)
end

context 'when fetching Accreditation files which do not require an api key' do
context 'when fetching files which do not require an api key' do
before do
system('cp spec/fixtures/Accreditation/download.zip tmp')
system('cp spec/fixtures/Accreditation/download.zip tmp/download.zip')
system('cp spec/fixtures/download_8_keys_sites.xls tmp/eight_key.xls')
end

# rubocop:disable RSpec/AnyInstance
it 'downloads a zip file from the edu website' do
# rubocop:disable RSpec/AnyInstance
allow_any_instance_of(described_class).to receive(:download_csv).and_return(true)
# rubocop:enable RSpec/AnyInstance

CSV_TYPES_NO_API_KEY_TABLE_NAMES.each do |klass_nm|
get(:api_fetch, params: { csv_type: klass_nm })
expect(File.exist?('tmp/download.zip')).to be true
end
allow_any_instance_of(NoKeyApis::NoKeyApiDownloader).to receive(:download_csv).and_return(true)

get(:api_fetch, params: { csv_type: 'EightKey' })
expect(Upload.last.ok).to be true
end

it 'extracts the content of the zip file' do
# rubocop:disable RSpec/AnyInstance
allow_any_instance_of(described_class).to receive(:download_csv).and_return(true)
# rubocop:enable RSpec/AnyInstance
allow_any_instance_of(NoKeyApis::NoKeyApiDownloader).to receive(:download_csv).and_return(true)

get(:api_fetch, params: { csv_type: CSV_TYPES_NO_API_KEY_TABLE_NAMES.first })
get(:api_fetch, params: { csv_type: 'AccreditationAction' })

expect(File.exist?('tmp/AccreditationActions.csv')).to be true
expect(File.exist?('tmp/AccreditationRecords.csv')).to be true
expect(File.exist?('tmp/InstitutionCampus.csv')).to be true
end
# rubocop:enable RSpec/AnyInstance
end
end

Expand Down
Loading

0 comments on commit baf44c8

Please sign in to comment.