Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Week 2 work I forgot to push last week #24

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions greenery/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

target/
dbt_packages/
logs/
15 changes: 15 additions & 0 deletions greenery/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Welcome to your new dbt project!

### Using the starter project

Try running the following commands:
- dbt run
- dbt test


### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
Empty file added greenery/analyses/.gitkeep
Empty file.
44 changes: 44 additions & 0 deletions greenery/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'greenery'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'greenery'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
greenery:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view

post-hook:
- "GRANT SELECT ON {{this}} TO reporting"

on-run-end:
- "GRANT USAGE ON SCHEMA dbt_mahelet_f TO reporting"
Empty file added greenery/macros/.gitkeep
Empty file.
9 changes: 9 additions & 0 deletions greenery/macros/calculate_rates.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% macro calculate_rates(numerator_column_name, denominator_column_name, dp = 2) %}

round(
({{ numerator_column_name }}::numeric /
NULLIF({{denominator_column_name }}::numeric, 0))
* 100, {{dp}})

{% endmacro %}

16 changes: 16 additions & 0 deletions greenery/macros/calculate_rates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: 2

macros:
- name: calculate_rates
arguments:
- name: numerator_column_name
type: column name
description: "The name of a column that will be the numerator in the rate calculation"

- name: denominator_column_name
type: column name
description: "The name of a column that will be the denominator in the rate calculation"

- name: dp
type: integer
description: "The number of decimal places to round to. Default is 2."
9 changes: 9 additions & 0 deletions greenery/macros/postive_values.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% test positive_values(model, column_name) %}


select *
from {{ model }}
where {{ column_name }} < 0


{% endtest %}
26 changes: 26 additions & 0 deletions greenery/models/example/my_first_dbt_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

/*
Welcome to your first dbt model!
Did you know that you can also configure models directly within SQL files?
This will override configurations stated in dbt_project.yml

Try changing "table" to "view" below
*/

{{ config(materialized='table') }}

with source_data as (

select 1 as id
union all
select null as id

)

select *
from source_data

/*
Uncomment the line below to remove records with null `id` values
*/
where id is not null
6 changes: 6 additions & 0 deletions greenery/models/example/my_second_dbt_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

-- Use the `ref` function to select from other models

select *
from {{ ref('my_first_dbt_model') }}
where id = 1
21 changes: 21 additions & 0 deletions greenery/models/example/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

version: 2

models:
- name: my_first_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null

- name: my_second_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
15 changes: 15 additions & 0 deletions greenery/models/example/staging/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: 2

models:
- name: stg_superheroes
description: Demographics about each superhero such as name, hair color, height and weight.
columns:
- name: name
tests:
- not_null
- name: height
tests:
- positive_values
- name: weight
tests:
- positive_values
22 changes: 22 additions & 0 deletions greenery/models/example/staging/source.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2

sources:

- name: tutorial # name of the source
schema: public # this is the schema our raw data lives in
database: dbt # this is the name of our database

quoting:
database: false
schema: false
identifier: false

freshness:
warn_after: {count: 24, period: hour}
error_after: {count: 48, period: hour}

tables:
- name: superheroes
loaded_at_field: created_at
description: >
Contains demographic information about each superhero
19 changes: 19 additions & 0 deletions greenery/models/example/staging/stg_superheroes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{
config(
materialized='table'
)
}}

SELECT
id AS superhero_id,
name,
gender,
eye_color,
race,
hair_color,
NULLIF(height, -99) AS height,
publisher,
skin_color,
alignment,
NULLIF(weight, -99)
FROM {{ source('tutorial', 'superheroes') }}
36 changes: 36 additions & 0 deletions greenery/models/marts/core/core.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: 2

models:
- name: int_orders_grouped_by_user
description: Order data grouped by user
columns:
- name: user_id
tests:
- not_null
- unique

- name: int_events_grouped_by_user
description: events data grouped by user
columns:
- name: user_id
tests:
- not_null
- unique


- name: dim_users
description: Info users combining a bunch of tables together
columns:
- name: user_id
tests:
- not_null
- unique

- name: facts_orders
description: Info on orders combining a bunch of tables together
columns:
- name: order_id
tests:
- not_null
- unique

56 changes: 56 additions & 0 deletions greenery/models/marts/core/dim_users.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{{
config(
materialized='table'
)
}}

with stg_users as (

select * from {{ref('stg_users') }}

)

, int_orders as (

select * from {{ref('int_orders_grouped_by_user')}}
)

, int_sessions as (
select * from {{ref('int_events_grouped_by_user')}}
)

, stg_addresses as (

select * from {{ref('stg_addresses')}}
)

SELECT
stg_users.*
, stg_users.first_name || stg_users.last_name as full_name
, stg_addresses.address
, stg_addresses.zipcode
, stg_addresses.state
, stg_addresses.country
, int_orders.num_of_orders
, int_orders.num_of_promos
, int_orders.first_order_created_at
, int_orders.last_order_created_at
, int_orders.total_order_cost
, int_orders.total_shipping_cost
, int_orders.total_order_total
, int_sessions.num_of_events
, int_sessions.num_of_sessions
, int_sessions.first_event_created_at
, int_sessions.last_event_created_at
, int_sessions.num_of_page_view_events
, int_sessions.num_of_add_to_cart_events
, int_sessions.num_of_checkout_events
, int_sessions.num_of_package_shipped_events

FROM stg_users
LEFT JOIN int_orders
ON stg_users.user_id = int_orders.user_id
LEFT JOIN int_sessions
ON stg_users.user_id = int_sessions.user_id
LEFT JOIN stg_addresses
ON stg_users.address_id = stg_addresses.address_id
28 changes: 28 additions & 0 deletions greenery/models/marts/core/facts_orders.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

with stg_orders as (
select * from {{ref('stg_orders')}}
)

, stg_promos as (
select *
from {{ref('stg_promos')}}

)

, stg_addresses as (

select * from {{ref('stg_addresses')}}
)

SELECT
stg_orders.*
, case when stg_promos.discount is not null then 1 else 0 end as discount_used
, stg_addresses.address
, stg_addresses.zipcode
, stg_addresses.state
, stg_addresses.country
FROM stg_orders
LEFT JOIN stg_promos
ON stg_orders.promo_id = stg_promos.promo_id
LEFT JOIN stg_addresses
ON stg_orders.address_id = stg_addresses.address_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{
config(
materialized='table'
)
}}

with stg_events as (

select * from {{ref('stg_events') }}

)

, events_agg as (
SELECT
user_id
, count(event_id) as num_of_events
, count(session_id) as num_of_sessions
, min(created_at) as first_event_created_at
, max(created_at) as last_event_created_at
, count(case
when event_type = 'page_view'
then event_id ELSE NULL END) as num_of_page_view_events
, count(case
when event_type = 'add_to_cart'
then event_id ELSE NULL END) as num_of_add_to_cart_events
, count(case
when event_type = 'checkout'
then event_id ELSE NULL END) as num_of_checkout_events
, count(case
when event_type = 'package_shipped'
then event_id ELSE NULL END) as num_of_package_shipped_events

FROM stg_events
GROUP BY user_id )

SELECT *
FROM events_agg
Loading