Skip to content

Commit

Permalink
Merge pull request #9 from fipelle/dev
Browse files Browse the repository at this point in the history
Patch release with new download headers
  • Loading branch information
fipelle authored Apr 21, 2023
2 parents 8d17f8a + b350ef1 commit c0425a0
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "CeMicrodata"
uuid = "11053890-0308-406b-94d3-d76ebcae6419"
version = "0.1.0"
version = "0.1.1"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
34 changes: 32 additions & 2 deletions src/get_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,24 @@
Download csv files for a given reference year and survey (interview / diary).
"""
function download_csv_files(ref_year::String, is_interview_survey::Bool, download_folder::String)

# Setup headers
headers = Dict(
"User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language" => "en-GB,en;q=0.5",
"Accept-Encoding" => "gzip, deflate, br",
"Connection" => "keep-alive",
"Upgrade-Insecure-Requests" => "1",
"Sec-Fetch-Dest:" => "document",
"Sec-Fetch-Mode:" => "navigate",
"Sec-Fetch-Site:" => "none",
"Sec-Fetch-User:" => "?1"
)

# Download csv file
survey_id = ifelse(is_interview_survey, "intrvw$(ref_year[end-1:end])", "diary$(ref_year[end-1:end])");
Downloads.download("https://www.bls.gov/cex/pumd/data/comma/$(survey_id).zip", "$(download_folder)/$(survey_id).zip");
Downloads.download("https://www.bls.gov/cex/pumd/data/comma/$(survey_id).zip", "$(download_folder)/$(survey_id).zip", headers=headers);
run(`unzip -qq $(download_folder)/$(survey_id).zip -d $(download_folder)/`);
return survey_id;
end
Expand Down Expand Up @@ -124,9 +140,23 @@ Return stubs tables in DataFrame format.
"""
function get_stubs()

# Setup headers
headers = Dict(
"User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language" => "en-GB,en;q=0.5",
"Accept-Encoding" => "gzip, deflate, br",
"Connection" => "keep-alive",
"Upgrade-Insecure-Requests" => "1",
"Sec-Fetch-Dest:" => "document",
"Sec-Fetch-Mode:" => "navigate",
"Sec-Fetch-Site:" => "none",
"Sec-Fetch-User:" => "?1"
)

# Download stubs
download_folder = mktempdir(prefix="ce_pumd_", cleanup=true);
Downloads.download("https://www.bls.gov/cex/pumd/stubs.zip", "$(download_folder)/stubs.zip");
Downloads.download("https://www.bls.gov/cex/pumd/stubs.zip", "$(download_folder)/stubs.zip", headers=headers);
run(`unzip -qq $(download_folder)/stubs.zip -d $(download_folder)/`);

# Memory pre-allocation for output
Expand Down

0 comments on commit c0425a0

Please sign in to comment.