From 5b31abf59a0b7fd3252580ab8441768ec8a2cf7e Mon Sep 17 00:00:00 2001 From: z3z1ma Date: Tue, 31 Dec 2024 13:44:54 -0700 Subject: [PATCH] feat: handle variants during knowledge graph construction and scope graph to node, add example prefix in config block --- demo_duckdb/jaffle_shop.duckdb | Bin 1323008 -> 1323008 bytes demo_duckdb/models/orders_prefix.sql | 60 ++++++++++++++++++ demo_duckdb/models/schema.yml | 30 ++++++++- src/dbt_osmosis/core/osmosis.py | 32 ++++++---- .../test_column_level_knowledge_propagator.py | 2 +- 5 files changed, 109 insertions(+), 15 deletions(-) create mode 100644 demo_duckdb/models/orders_prefix.sql diff --git a/demo_duckdb/jaffle_shop.duckdb b/demo_duckdb/jaffle_shop.duckdb index 045067b60dbed9ce0b59db0a4534345d874da8f9..54e44cbca61606f9161f2360404e1ef342eae25b 100644 GIT binary patch delta 13456 zcmeHN4^)&@7XR+>!3;Bl4F3fYP|=k9GoX~1t+J^{TS{zMkJfPqWCn!LK|pl(w25se zJ7vP~6|Yt+a?3WiV<6?UR!dfjq^{zh&A({ao^@Rl+1k^V)1)q{|$> znfLB{@4ox)z3;yJdmlBYb8AlLHd;a>2Am8&`1d1;b6gRzFt{bfaymTp*o4#h8=onk ze3!ES{#Zz6#HfR%R_Lngctql#D7$1-qO8rjvwG#%=_8d3)<<-mwD_msbB1C`BCGa8 zoH9FOL{Qe)bh3RvEND5LF)@sGU$=9l;>KQ}YWk%SrnCAY$u7wbsZp==d=aD6 zO&!&%>FsFN1lelSFlBc3n2@u2hjdm?nq?TI?CG#7E9_R~!k!S)ZGbdVV*;yEj?}hR zD>5x=ca!&f-V)x08Qot;T%&3Pu-8>+Kj`uhE3?7N~1cw$^D^=K!+e zIuxr|N=Nu?%^T2ZQ-#)i{h*fra`Q_I@_%G z;Kwe3L&6xD^uj0^_ybQ*-xph+WOCyRjCp+>JX?wp{Kjb5@+a4A7~WqDHX0WmBt ze%v;SHw{y6*)=Al6Zc!^brlzll4M{i>_0-d>tRUo^TD+KBg7}1vc2wqgbbwnvRbj! z4FY}3oVucLQBQ<$te1r=A3Pf+B8V0rSUh0c+S?>zDhtm2#|5NOZC`80MO*8S@wm8i zXa8}5xx1EXv8+a<&*E1LeOf`JLf#no);^YUutT56$!j%@wkY}8;WT9k`;L!wo)WP< zho`ih;GdJS#7u;~nXZ&bmY4?yJwJPiL?}^MgC+CK2;35?*%)h15!n^bNv{NUv@nAO zE;e%yF&L+#aDhdlVfBXwH0%VukX-X9G4y{zHq<1+q}u`sekSR>KN?fR`}mSXYDiUv zu15tET4F*n(OdWNxal{9o#aAJZW+zd#TKC4eAVb(CsM^cUaM;RLzP71%0$6)EMH^b zCkivT80^=Q##x9q?~;V5+T?tgP=Of0n*Dk3j<_=G)j&>)PbugiQwPHcA9LK$(o|Lj zVp%-Rf2G@?yf`ZkjdYYwHmxPU?n{O=!uo1f_$atHO@6GTrfB*C6 zc}ne0qyC(M-eTx5e;-E1ra^@6oPp)`D%~N~8CY%~NN%)X2LF>@y&I#kcRX4DHbg3q zT?*yx!<08JjR`qta7gD2Kg6wvQnPY^@}g&`W*W_y*W`^ve<-DQVRWyGeNPMD6iUgn z;SeJ>MSmb=my05#^9V*`E-IB){yzfg#ogb1ASD||!Sx+U@kTUz#whMG>Os!qDfb_K z&WHn2JH)?r3Vi+CB>TkeAMWg9rtvk__jKd$O3{hoG3u&NU`+JQ=?|}2z zS)<;a(%fr}jz&#N#mX_Bh+*lzAgT%t9nivfGPM;xWbu{eUhMS(b&6n`AI145L@+^l zuB_F^cIx@Y;@%Wbd{=({~PzT0~! z4Bl08Glq+IeT6z(hSHfa*5ItLJ1Qz|8KkZNVh0(G%S);jrEp?Of%+yfrZQ!uk#tl5 z#>E0?ARPCHlz3P$qAL3kh}rJyp!X(Nm6kSfZvDUtMVtJAd<;gz`kA`}eo>m0*h|i#1l02Sv`YH^HrOTkyX5DRR)%j6(l8(PZ zArP{-7{*R$3ECwERW4inz63Z(=a3q3pc{#lg^75CCCILo)E$A5u$S~8JKa&4)E+~V zTs{JEnZFJ0gXwA{`Z8_gb(N_rnCve`^_?%lHy}%o!bqJvm?STPc-_0fob#!p5S969 zU!4CFz}XeTW>Psn5W+a?ln_!=3Te9MLl|FZB5RL97IZe;3(?S}F*CA}Jg@IhU0?muT(phLXe+kQQ7d#iDyrN0f`D)kbpr2}qAGk|Le9 z;*z2Oo=1&j=N|qFqc|3cme|tQ8q);C{`YX>*S(<|SR7#){aN!FJmXPrd zh=}cDy~vE@FM;VI04->!2APC9!A9!3;3l%<6jU0hc=Y4`@D%)u#V+=L3AC%*Spn%m z+UTv-kROV1_9gLx;6}0t-{|4VbZ59S=`ZTdM}GqM_5T^*_pbogeF8W8mHq^-KYUiXq?d$B^$_BN)<+7%~h(IfV3hfTVjME*{k2 zBjYc9=qCst9dXsdAUQR;<`mwSHH5oTj1)^6V7X$=N^^IXx5{^ra;t106;{hUlY{}DtrmOMXIK{N<{Gv&+dfWcjDK(2KSB#LE|*3XzBaV( zH{0}yJ`Hlf;GF7> z^R|{w|DfrG7vu#88)qv!HvEA!W>_|dGZo{E!L)!gMrNXHbJw&NrazUFr0gi%GH-IM z(q7o}opKEw_Vi^{W#t_(b3n>3(%$)WuJXA18fPc1zIHO=cMtEVt>nUf)iaV^GI) z(&f1%*D9OH{Z`rTiik4`Eo1kt4D23%rU%zi+t!6Yv9)bydgjp@{){H8ta8i*q+!!& zq!sdxS`WT_mDsE?!Jg#P40$I`79l3P0hjE-CBNt1o+*!q8;83Z9=xaij`{C64sEdi ztlrTc>AI&Ob^aFn(D6m>^{MkWl!ZP~ws4DM!+T@Mo=iE#RMUT|2&7Po3|Ix`|oJnbU6} zF^>3N)l)dA_ncHq`%p`de>*Lu=|I&_w6A^$>gOeL?iSe;sGuVkjZZ#b4#&0*U&s_> zUaoUjS)b;p inp=UCPBg#iiipC0D3Mp6&obi^hR*=^`K<87PyP!u9C5V( delta 10637 zcmeHMdr(x@8Nc7|-DP)KQRi zud#JtJ#Ftyn~Lf5Vf%+dD6=Cm6rF|y$(X|xT zsTS#3wrEx-yLv5^{q49bespv_8lv@*sGZLmYD>%~O#D~tktvIfblAzqJQwP*vrK_AIha{zas_Wq|KClB8$cfNRn3~Nfrs=r-a@x z9?a%tZzkj%teZ)S-{BOU9UZdhofhM5;TbuQ#O-&**}`q3B^6V?md|z?2KdJcDkZz$ zk;#i;q%+u+H|%IOZ@AuML1CZYk--9&QkdFNh^!>Nn^dGtA-Cp8r?0VKT?PwwnYEoV zrm{oLi(fBM3sfIfKia$OtwyG&wH!M0w3dk8f^{jPX(iNz)Y|T6g~A?{Gr2Vz8>80z z*G6H@t)81(^XbPIC-_|suDodknbRK?SV2c^`XYMKi<` zT?YFf;S#2DS)mv!rov9ie#cZ49jq%N7nT9iX5m(_NWASSDCWpa=Yxq{k^b>N0JktB zY_xnuEi>`Fh~6_LHBv-kn*)Z!JgXr6~C6YgNf_@&?5HBvyl(eT1KI+PQx(zueo%aqV+ zO|4YqA9Pc3uM&EmAVc1Z%lU9hVKfx#7SFxxN}CD`e1P0YI|3QpsJcnyZNwZ=Xbyc951-J zvX>?T(PV40iwJvm{N31c9y=4;a#RkJzF;hlEr*?adm7Z6@+HFL#S;L>?EWwlTglPz z1QG@W1ri3el+b5s*EGQ-fStm0>!zrJLVW;SkD-+)uQH@_SXqVnPW&Bx-|tG8gh~PR zja)3yp`-=1Ilxmp7NU~BkJ?1PD=?jpvLj$g@8o<4GU$Y7D#z}? zlL90+MVZjk7n)Gp6QMsT+7*Q|$IrJxaslhx)QFs%Y-|xDWj`6p90RI}dOz_d74(l! z^l~f?Eoi4|%tV=XH5eT75pJ1(}$1fHJ&|i9ygEDS^HTmBecCj5P~K8V2bp2IoBfB zcOx6@e@agTB)7om@`4Rf=I7=L)5mAFxg$M7ge=~|!OabRmtC(u7a}K%8C*6>cCGj{ zJqev8_#&0?_OY6we4{Ex#;ulfc?xY8f<9yHzaq*gFGy{F?kB78`>)al*A5Pw}$ic+3VL! z%$j+_2pGtA@Xh8~}>#36jVG_q%4yUW&&+HeYF(>RpI4tIb zqamtWa>80yBE`ILc-1LiLN81ToUikieX#cPB8oX-M8YR?!sI|q4%^}_1jhwB%D6E? zUZX7(`^LsNxkd_jNji-w-ZxC6W8POk zD24a^@La_Ez9bywUG=-lj9c=v+Ty8(Jj2@IG1vNP0{7ClRr|MNwU;LHjrIO}9q&87t_I{`dvc#Er#im znl1D^{&DJK{ui+a6ooys7#@(C|65URBCBB{+1x~|WR*;k#LXydD257QBylpg824@` z;APBG?ol6eNw)palVp0PKWU#Z6=js6v&ZZwB~M2xu@-_628{R`wol8x|rdzES#w+Xj7 Npj5|Q<&r#Q{5R6tE|CBL diff --git a/demo_duckdb/models/orders_prefix.sql b/demo_duckdb/models/orders_prefix.sql new file mode 100644 index 00000000..4c715667 --- /dev/null +++ b/demo_duckdb/models/orders_prefix.sql @@ -0,0 +1,60 @@ +{{ config( + dbt_osmosis_prefix="o_", +) }} + +{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} + +with orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +order_payments as ( + + select + order_id, + + {% for payment_method in payment_methods %} + sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, + {% endfor %} + + sum(amount) as total_amount + + from payments + + group by order_id + +), + +final as ( + + select + orders.order_id as o_order_id, + orders.customer_id as o_customer_id, + orders.order_date as o_order_date, + orders.status as o_status, + + {% for payment_method in payment_methods %} + + order_payments.{{ payment_method }}_amount as o_{{ payment_method }}_amount, + + {% endfor -%} + + order_payments.total_amount as o_amount + + from orders + + + left join order_payments + on orders.order_id = order_payments.order_id + +) + +select * from final diff --git a/demo_duckdb/models/schema.yml b/demo_duckdb/models/schema.yml index be701013..b43e1b50 100644 --- a/demo_duckdb/models/schema.yml +++ b/demo_duckdb/models/schema.yml @@ -1,5 +1,4 @@ version: 2 - models: - name: customers description: This table has basic information about a customer, as well as some derived facts based on a customer's orders @@ -83,3 +82,32 @@ models: description: Total amount (AUD) of the order tests: - not_null + - name: orders_prefix + columns: + - name: o_order_id + description: '' + data_type: INTEGER + - name: o_customer_id + description: '' + data_type: INTEGER + - name: o_order_date + description: '' + data_type: DATE + - name: o_status + description: '{{ doc("orders_status") }}' + data_type: VARCHAR + - name: o_credit_card_amount + description: '' + data_type: DOUBLE + - name: o_coupon_amount + description: '' + data_type: DOUBLE + - name: o_bank_transfer_amount + description: '' + data_type: DOUBLE + - name: o_gift_card_amount + description: '' + data_type: DOUBLE + - name: o_amount + description: '' + data_type: DOUBLE diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index ba359c9a..1f8a1c98 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -1098,6 +1098,13 @@ def _build_column_knowledge_graph( """Generate a column knowledge graph for a dbt model or source node.""" tree = _build_node_ancestor_tree(context.project.manifest, node) + pm = get_plugin_manager() + node_column_variants: dict[str, list[str]] = {} + for column_name, _ in node.columns.items(): + variants = node_column_variants.setdefault(column_name, [column_name]) + for v in pm.hook.get_candidates(name=column_name, node=node, context=context.project): + variants.extend(t.cast(list[str], v)) + column_knowledge_graph: dict[str, dict[str, t.Any]] = {} for generation in reversed(sorted(tree.keys())): ancestors = tree[generation] @@ -1108,15 +1115,21 @@ def _build_column_knowledge_graph( if not isinstance(ancestor, (SourceDefinition, SeedNode, ModelNode)): continue - for name, metadata in ancestor.columns.items(): + for name, _ in node.columns.items(): graph_node = column_knowledge_graph.setdefault(name, {}) + for variant in node_column_variants[name]: + incoming = ancestor.columns.get(variant) + if incoming is not None: + break + else: + continue + graph_edge = incoming.to_dict() + if context.settings.add_progenitor_to_meta: graph_node.setdefault("meta", {}).setdefault( "osmosis_progenitor", ancestor.unique_id ) - graph_edge = metadata.to_dict() - if context.settings.use_unrendered_descriptions: raw_yaml = _get_member_yaml(context, ancestor) or {} raw_columns = t.cast(list[dict[str, t.Any]], raw_yaml.get("columns", [])) @@ -1125,7 +1138,7 @@ def _build_column_knowledge_graph( lambda c: normalize_column_name( c["name"], context.project.config.credentials.type ) - == name, + in node_column_variants[name], {}, ) if unrendered_description := raw_column_metadata.get("description"): @@ -1183,15 +1196,8 @@ def inherit_upstream_column_knowledge( column_knowledge_graph = _build_column_knowledge_graph(context, node) kwargs = None for name, node_column in node.columns.items(): - variants: list[str] = [name] - pm = get_plugin_manager() - for v in pm.hook.get_candidates(name=name, node=node, context=context.project): - variants.extend(t.cast(list[str], v)) - for variant in variants: - kwargs = column_knowledge_graph.get(variant) - if kwargs is not None: - break - else: + kwargs = column_knowledge_graph.get(name) + if kwargs is None: continue updated_metadata = {k: v for k, v in kwargs.items() if v is not None and k in inheritable} diff --git a/tests/test_column_level_knowledge_propagator.py b/tests/test_column_level_knowledge_propagator.py index 2af5312f..fe4d02eb 100644 --- a/tests/test_column_level_knowledge_propagator.py +++ b/tests/test_column_level_knowledge_propagator.py @@ -305,7 +305,7 @@ def test_update_undocumented_columns_with_prior_knowledge_skip_merge_meta( assert target_node_columns["customer_id"].description == "THIS COLUMN IS UPDATED FOR TESTING" assert ( target_node_columns["customer_id"].meta == {"my_key": "my_value"} - ) # NOTE: nodes meta is not mutated beyond our original mutation in the manifest node since skip_merge_tags is True + ) # NOTE: nodes meta is not mutated beyond our original mutation in the manifest node since skip_merge_meta is True assert sorted(target_node_columns["customer_id"].tags) == [ "my_tag1", "my_tag2",