Merge pull request asciidoctor#117 from datastax-training/drivers-php…

…-beginner Drivers php beginner
datastax-training · Feb 15, 2016 · bde5d56 · bde5d56
2 parents 7e165e6 + 4dba659
commit bde5d56
Show file tree

Hide file tree

Showing 10 changed files with 619 additions and 14 deletions.
diff --git a/cassandra/dev/drivers/php/beginner/build.gradle b/cassandra/dev/drivers/php/beginner/build.gradle
@@ -0,0 +1,17 @@
+plugins {
+  id 'com.datastax.gradle.curriculum.plugin' version '0.1.5'
+}
+
+/*
+buildscript {
+  repositories {
+    mavenLocal()
+    jcenter()
+  }
+  dependencies {
+    classpath 'com.datastax:gradle-curriculum-plugin:0.1.5-SNAPSHOT'
+  }
+}
+*/
+
+apply plugin: 'com.datastax.gradle.curriculum.plugin'
diff --git a/cassandra/dev/drivers/php/beginner/src/exercises.adoc b/cassandra/dev/drivers/php/beginner/src/exercises.adoc
@@ -0,0 +1,272 @@
+
+== Data Modeling Use Case
+
+=== *Exercise 1: Reviewing the investment portfolio management use case*
+
+==== *In this exercise, you will:*
+
+* Review the conceptual, logical, and pysical designs
+* Instantiate and query the database
+
+==== *_Steps_*
+
+==== *Review the conceptual, logical, and physical designs*
+
+. Review the data modeling steps.
+
+image::{image_path}/usecaseimage1.jpg[]
+
+image::{image_path}/investmentreview.svg[]
+
+
+==== *Instantiate and query the database*
+
+. In cqlsh, create and populate the tables shown above by executing the CQL script.
+
+[source,sql]
+ SOURCE '~/casdat/exercise7/portfolio.cql';
+
+. In cqlsh, execute the USE command to set the portfolio keyspace as the current default.
+
+[source,sql]
+ USE portfolio;
+
+. In cqlsh (or DevCenter), express the following queries in CQL over the portfolio database for a user with username ‘green’.
+
+Q1: Find all investment account information for a specified username.
+
+[source,sql]
+ SELECT * FROM accounts_by_user WHERE username = 'green';
+
+Q2: Find all positions (stocks, quantities, market values) for a specified account number; order by stock symbol (ASC).
+
+[source,sql]
+ SELECT * FROM stocks_by_account WHERE account_number = 111111111;
+
+Q3: Find all trades for a specified account number and, optionally, a known date range, trade type (buy/sell), and stock symbol; order by trade date (DESC).
+
+Q3.1: Find all trades for a specified account number; order by trade date (DESC).
+
+[source,sql]
+....
+SELECT * FROM trades_by_account12 WHERE account_number = 111111111;
+
+SELECT * FROM trades_by_account WHERE account_number = 111111111;
+....
+
+Q3.2: Find all trades for a specified account number and date range; order by trade date (DESC).
+
+[source,sql]
+....
+SELECT * FROM trades_by_account12 WHERE account_number = 111111111 AND date > '2014-05-01';
+
+SELECT * FROM trades_by_account WHERE account_number = 111111111 AND date > '2014-05-01';
+....
+
+Q3.3: Find all trades for a specified account number, date range, and trade type; order by trade date (DESC).
+
+[source,sql]
+....
+SELECT * FROM trades_by_account3 WHERE account_number = 111111111 AND date > '2014-01-01' AND type = 'Buy';
+
+SELECT * FROM trades_by_account WHERE account_number = 111111111 AND date > '2014-01-01' AND type = 'Buy';
+....
+
+Q3.4: Find all trades for a specified account number, date range, trade type, and stock symbol; order by trade date (DESC).
+
+[source,sql]
+....
+SELECT * FROM trades_by_account4 WHERE account_number = 111111111 AND date > '2014-01-01' AND type = 'Buy' AND symbol = 'EBAY';
+
+SELECT * FROM trades_by_account WHERE account_number = 111111111 AND date > '2014-01-01' AND type = 'Buy' AND symbol = 'EBAY' ALLOW FILTERING;
+....
+
+Q3.5: Find all trades for a specified account number, date range, and stock symbol; order by trade date (DESC).
+
+[source,sql]
+....
+SELECT * FROM trades_by_account5 WHERE account_number = 111111111 AND date > '2014-01-01' AND symbol = 'EBAY';
+
+SELECT * FROM trades_by_account WHERE account_number = 111111111 AND date > '2014-01-01' AND symbol = 'EBAY';
+....
+
+Q4: Find all information about owners of investment accounts with a position in a specified stock.
+
+Q4.1: Find all bucket numbers for a specified stock.
+
+[source,sql]
+ SELECT * FROM buckets_by_stock WHERE symbol = 'NFLX';
+
+Q4.2: Retrieve all user information in a known bucket for a specified stock.
+
+[source,sql]
+ SELECT * FROM users_by_stock WHERE symbol = 'NFLX' AND bucket IN (1,2);
+
+Q5: Find how many accounts have shares of a specified stock.
+
+[source,sql]
+ SELECT * FROM accounts_by_stock WHERE symbol = 'NFLX';
+
+=== *Appendix*
+
+==== *Create Keyspace and Tables*
+
+[source,sql]
+....
+CREATE KEYSPACE portfolio
+WITH replication = {
+  'class': 'SimpleStrategy',
+  'replication_factor' : 1
+};
+
+USE portfolio;
+....
+
+[source,sql]
+....
+CREATE TABLE accounts_by_user (
+  username TEXT,
+  name TEXT,
+  ssn INT,
+  dob TIMESTAMP,
+  emails MAP<TEXT,TEXT>,
+  phones MAP<TEXT,TEXT>,
+  account_number INT,
+  cash_balance DECIMAL,
+  investment_value DECIMAL,
+  total_value DECIMAL,
+  PRIMARY KEY (username)
+  );
+....
+
+[source,sql]
+....
+CREATE TABLE stocks_by_account (
+  account_number INT,
+  symbol TEXT,
+  description TEXT,
+  quote DECIMAL,
+  quantity DECIMAL,
+  market_value DECIMAL,
+  PRIMARY KEY (account_number, symbol)
+  );
+....
+
+[source,sql]
+....
+CREATE TABLE trades_by_account12 (
+  account_number INT,
+  date TIMESTAMP,
+  type TEXT,
+  symbol TEXT,
+  trade_id UUID,
+  quantity DECIMAL,
+  price DECIMAL,
+  amount DECIMAL,
+  PRIMARY KEY (account_number, date, trade_id)
+  ) WITH CLUSTERING ORDER BY (date DESC);
+....
+
+[source,sql]
+....
+CREATE TABLE trades_by_account3 (
+  account_number INT,
+  date TIMESTAMP,
+  type TEXT,
+  symbol TEXT,
+  trade_id UUID,
+  quantity DECIMAL,
+  price DECIMAL,
+  amount DECIMAL,
+  PRIMARY KEY (account_number, type, date, trade_id)
+  ) WITH CLUSTERING ORDER BY (type ASC, date DESC);
+....
+
+[source,sql]
+....
+CREATE TABLE trades_by_account4 (
+  account_number INT,
+  date TIMESTAMP,
+  type TEXT,
+  symbol TEXT,
+  trade_id UUID,
+  quantity DECIMAL,
+  price DECIMAL,
+  amount DECIMAL,
+  PRIMARY KEY (account_number, symbol, type, date, trade_id)
+  ) WITH CLUSTERING ORDER BY (symbol ASC, type ASC, date DESC);
+....
+
+[source,sql]
+....
+CREATE TABLE trades_by_account5 (
+  account_number INT,
+  date TIMESTAMP,
+  type TEXT,
+  symbol TEXT,
+  trade_id UUID,
+  quantity DECIMAL,
+  price DECIMAL,
+  amount DECIMAL,
+  PRIMARY KEY (account_number, symbol, date, trade_id)
+  ) WITH CLUSTERING ORDER BY (symbol ASC, date DESC);
+....
+
+_We kept trades_by_account for demonstration purposes_
+
+[source,sql]
+....
+CREATE TABLE trades_by_account (
+  account_number INT,
+  date TIMESTAMP,
+  type TEXT,
+  symbol TEXT,
+  trade_id UUID,
+  quantity DECIMAL,
+  price DECIMAL,
+  amount DECIMAL,
+  PRIMARY KEY (account_number, date, trade_id)
+  ) WITH CLUSTERING ORDER BY (date DESC);
+
+  CREATE INDEX on trades_by_account (type);
+  CREATE INDEX on trades_by_account (symbol);
+....
+
+[source,sql]
+....
+CREATE TABLE buckets_by_stock (
+  symbol TEXT,
+  bucket INT,
+  rows COUNTER,
+  PRIMARY KEY (symbol, bucket)
+  );
+....
+
+[source,sql]
+....
+CREATE TABLE users_by_stock (
+  symbol TEXT,
+  bucket INT,
+  username TEXT,
+  name TEXT,
+  ssn INT,
+  dob TIMESTAMP,
+  emails MAP<TEXT,TEXT>,
+  phones MAP<TEXT,TEXT>,
+  account_number INT,
+  PRIMARY KEY ((symbol, bucket), username)
+  );
+....
+
+[source,sql]
+....
+CREATE TABLE accounts_by_stock (
+  symbol TEXT,
+  accounts COUNTER,
+  PRIMARY KEY (symbol)
+  );
+....
+
+
+
+*END OF EXERCISE*
diff --git a/cassandra/dev/drivers/php/beginner/src/includes.adoc b/cassandra/dev/drivers/php/beginner/src/includes.adoc
@@ -0,0 +1 @@
+include::{slide_path}/php_beginner.adoc[]
diff --git a/cassandra/dev/drivers/php/beginner/src/notes.adoc b/cassandra/dev/drivers/php/beginner/src/notes.adoc
@@ -0,0 +1,99 @@
+////
+In order to hide the instructor comments and make this file student-notes instead,
+add a ! to the end of instructor, like:
+:instructor!:
+////
+////
+This attribute is used to show/hide the instructor-only notes in this file.
+////
+:instructor!:
+
+== *DS220 Apache Cassandra Data Modeling*
+
+=== *Data Modeling Use Case*
+
+==== *Slide 1: What are sensor applications?*
+Sensors are everywhere, and Cassandra is a wonderful database to capture this kind
+of data. Cassandra is designed to handle time-series data, and when you think about it,
+it is clear that sensor applications exist to understand the _change_ in values that
+sensors are measuring.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* This might be a good opportunity to open the floor to discussion about Cassandra and sensor applications.
+endif::instructor[]
+
+==== *Slide 2: Sensor data: use case introduction*
+Generally, sensors are deployed in networks, but note that each sensor is uniquely
+identified and arrayed. Although the sensors may be completely identical, the geographical
+location or person to whom sensors are attached must be trackable in order to aggregate the
+data that will be collected.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* These are a quick round-up and review of everything taught over
+the course of 2 days. Do not get bogged down, but summarize what was
+taught and bridge each slide together with their relationship.
+endif::instructor[]
+
+==== *Slide 3: More Sensor data: use case introduction*
+When modeling the data and how you will store and retrieve it in Cassandra, it is
+vital to understand that the design is driven by the queries that you will make.
+The retrieval of data plays a crucial role in how to optimize the storage within
+the Cassandra database.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* It cannot be overemphasized that *query-driven* design is important
+in Cassandra. This may seem like a departure for the relational database designers
+in your audience.
+endif::instructor[]
+
+==== *Slide 4: Sensor data: conceptual data model*
+This conceptual data model shows the relationship of the data entities.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* Remind participants that the conceptual data model is database-agnostic,
+and uses Chen notation.
+endif::instructor[]
+
+==== *Slide 5: Sensor data: application workflow*
+This slide shows the relationship of the queries, and how you might expect a query
+to access sensor data.
+
+==== *Slide 6: Sensor data: logical data model*
+A logical data model, to review, puts together the conceptual model with data types
+and defines the partition key and primary keys of the various Cassandra tables that
+will be used by the application.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* This is the meat of this example. Discuss WHY certain choices are made
+for the primary key and clustering columns, in order to answer the queries that were
+designed.
+endif::instructor[]
+
+==== *Slides 7-9: Sensor data: analysis*
+Understanding the partition size and number of partitions is the final key to ensuring
+that the table design will work when scaled out on Cassandra. Duplication must be
+considered as part of the design, to be sure that the application will not be
+overwhelmed with updating tables when the data starts rolling in.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* The estimates done here are simply to give a magnitude of the scale
+that may result from the table design. Note that the analysis done here points to
+storing weekly data to end up with manageable partitions.
+endif::instructor[]
+
+==== *Slide 10-14: Sensor data: physical data model*
+Each table must be carefully designed to answer a question that the user has.
+
+ifdef::instructor[]
+[NOTE]
+*Instructor:* Some questions to ask the students as you go through the tables are:
+(1) what similarities do you see in the tables?
+(2) what differences?
+(3) why is clustering order important?
+endif::instructor[]