From 1c82c33641946ba5144f270d51576b676809705f Mon Sep 17 00:00:00 2001 From: Grant Andersen Date: Sat, 23 Mar 2019 18:39:50 -0400 Subject: [PATCH] First Commit --- .gitignore | 4 ++++ lahman_etl.R | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ lahman_etl.Rproj | 13 +++++++++++ 3 files changed, 75 insertions(+) create mode 100644 .gitignore create mode 100644 lahman_etl.R create mode 100644 lahman_etl.Rproj diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/lahman_etl.R b/lahman_etl.R new file mode 100644 index 0000000..71254d0 --- /dev/null +++ b/lahman_etl.R @@ -0,0 +1,58 @@ +require(DBI) +require(readr) +require(dplyr) + + +tables <- c('AllstarFull', +'Appearances', +'AwardsManagers', +'AwardsPlayers', +'AwardsShareManagers', +'AwardsSharePlayers', +'Batting', +'BattingPost', +'CollegePlaying', +'Fielding', +'FieldingOF', +'FieldingOFsplit', +'FieldingPost', +'HallOfFame', +'HomeGames', +'Managers', +'ManagersHalf', +'Parks', +'People', +'Pitching', +'PitchingPost', +'Salaries', +'Schools', +'SeriesPost', +'Teams', +'TeamsFranchises', +'TeamsHalf') + +foo<-dbConnect(MySQL(), host="localhost", dbname="lahman",user="root",port=3306) + +y=Sys.time() +for (t in tables) { + print(t) + if (t != "AwardsSharePlayers"){ + a <- read_csv(paste0("https://github.com/chadwickbureau/baseballdatabank/raw/master/core/", + t, + ".csv")) + } else { + a <- read_csv(paste0("https://github.com/chadwickbureau/baseballdatabank/raw/master/core/", + t, + ".csv"),col_names = c("awardID","yearID","lgID","playerID", + "pointsWon","pointsMax","votesFirst"), + skip=1, + col_types = "ciccdid") + } + + dbSendQuery(foo, paste0("drop table if exists ",t,";")) + dbWriteTable(foo, name=t, value=a %>% as.data.frame(a)) + +} +Sys.time()-y + +dbDisconnect(foo) \ No newline at end of file diff --git a/lahman_etl.Rproj b/lahman_etl.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/lahman_etl.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX