Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#3 Path of csv files is hardcoded #4

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion big-data-3/setup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/bin/bash

# get the .sh location
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# install postgres server and mongodb
sudo yum install -y postgresql-server postgresql-jdbc

Expand Down Expand Up @@ -46,7 +49,19 @@ gzip -d *.csv.gz
chmod 644 *.csv

# create and load tables for hands on
psql -f setup/init-postgres.sql
psql -f $DIR/setup/init-postgres.sql

#give permissions for the files so the postgres will be able read the csv files
chmod 755 $DIR/buy-clicks.csv
chmod 755 $DIR/game-clicks.csv
chmod 755 $DIR/ad-clicks.csv

#load the data from csv files into postgres
psql << EOF
COPY buyclicks FROM '$DIR/buy-clicks.csv' DELIMITER ',' CSV HEADER;
COPY gameclicks FROM '$DIR/game-clicks.csv' DELIMITER ',' CSV HEADER;
COPY adclicks FROM '$DIR/ad-clicks.csv' DELIMITER ',' CSV HEADER;
EOF

# download and install anaconda for pandas, jupyter
wget http://repo.continuum.io/archive/Anaconda3-4.0.0-Linux-x86_64.sh
Expand Down
8 changes: 4 additions & 4 deletions big-data-3/setup/init-postgres.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
DROP TABLE IF EXISTS buyclicks;
DROP TABLE IF EXISTS gameclicks;
DROP TABLE IF EXISTS adclicks;

CREATE TABLE buyclicks (
timestamp TIMESTAMP WITHOUT TIME ZONE NOT NULL,
txid INTEGER NOT NULL,
Expand Down Expand Up @@ -32,7 +36,3 @@ CREATE TABLE adclicks (
delete from buyclicks;
delete from gameclicks;
delete from adclicks;

COPY buyclicks FROM '/home/cloudera/Downloads/big-data-3/buy-clicks.csv' DELIMITER ',' CSV HEADER;
COPY gameclicks FROM '/home/cloudera/Downloads/big-data-3/game-clicks.csv' DELIMITER ',' CSV HEADER;
COPY adclicks FROM '/home/cloudera/Downloads/big-data-3/ad-clicks.csv' DELIMITER ',' CSV HEADER;