From 0bc8b9de2e94081201ea7ddcd5f7ac4e41f63645 Mon Sep 17 00:00:00 2001 From: Sonya Huang Date: Sat, 27 Jan 2024 09:23:30 -0500 Subject: [PATCH 1/2] fix: parse dynamic table query from DDL Previously we have been extracting the `query` portion of dynamic table definitions from the stored DDL in Snowflake by searching for the string position of keywords that preceded the query. However, this approach has proven inadequate as there are many ways to write working dynamic table definitions that conflict with string position assumptions. Observing that definitions of views and materialized views have faced similar challenges but are currently working, this commit extends the existing ViewSelectStatementExtractor that is currently being used to extract the query definition from view and materialized view DDL statements, applying the same approach to dynamic table DDL statements. --- pkg/resources/dynamic_table.go | 32 +--------- .../dynamic_table_acceptance_test.go | 8 ++- pkg/snowflake/parser.go | 51 ++++++++++++++- pkg/snowflake/parser_test.go | 63 +++++++++++++++++++ 4 files changed, 121 insertions(+), 33 deletions(-) diff --git a/pkg/resources/dynamic_table.go b/pkg/resources/dynamic_table.go index eba737df17..c6e55b5f67 100644 --- a/pkg/resources/dynamic_table.go +++ b/pkg/resources/dynamic_table.go @@ -3,12 +3,12 @@ package resources import ( "context" "database/sql" - "errors" "log" "strings" "github.com/Snowflake-Labs/terraform-provider-snowflake/pkg/helpers" "github.com/Snowflake-Labs/terraform-provider-snowflake/pkg/sdk" + "github.com/Snowflake-Labs/terraform-provider-snowflake/pkg/snowflake" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" ) @@ -243,7 +243,8 @@ func ReadDynamicTable(d *schema.ResourceData, meta interface{}) error { return err } - query, err := getQueryFromDDL(dynamicTable.Text) + extractor := snowflake.NewViewSelectStatementExtractor(dynamicTable.Text) + query, err := extractor.ExtractDynamicTable() if err != nil { return err } @@ -254,33 +255,6 @@ func ReadDynamicTable(d *schema.ResourceData, meta interface{}) error { return nil } -/* - * Previous implementation tried to match query part from the whole dynamic table DDL statement by just using `AS`. - * It was failing for table names containing `AS` (like `REASON`). It was also failing for other parts containing `AS`. - * We cannot simply match by ` AS ` because this can still be part of COMMENT or SELECT query itself. - * We have considered not setting the query at all but it was not ideal because of: - * - possible external changes to dynamic table (drop and recreate externally with different query); - * - import not 100% correct. - * We did not want to complicate the implementation too much by introducing parsers. - * One more thing worth mentioning is the whitespace that can be introduced by the user that is still returned by SHOW. - * For now, we just normalize the DDL before extraction of query. - * - * The outcome implementation matches by ` AS SELECT ` and checks the number of matches. - * If more matches are found, the error is returned to inform user about possible cause of error. - * - * Refer to issue https://github.com/Snowflake-Labs/terraform-provider-snowflake/issues/2329. - */ -func getQueryFromDDL(text string) (string, error) { - normalizedDDL := normalizeQuery(text) - matchSubstring := " AS SELECT " - matches := strings.Count(strings.ToUpper(normalizedDDL), matchSubstring) - if matches != 1 { - return "", errors.New("too many matches found. There is no way of getting ONLY the 'query' used to create the dynamic table from Snowflake. We try to get it from the whole creation statement but there may be cases where it fails. Please submit the issue on Github (refer to #2329)") - } - idx := strings.Index(strings.ToUpper(normalizedDDL), " AS SELECT ") - return strings.TrimSpace(normalizedDDL[idx+4:]), nil -} - func parseTargetLag(v interface{}) sdk.TargetLag { var result sdk.TargetLag tl := v.([]interface{})[0].(map[string]interface{}) diff --git a/pkg/resources/dynamic_table_acceptance_test.go b/pkg/resources/dynamic_table_acceptance_test.go index e638b9a412..0ed05fd816 100644 --- a/pkg/resources/dynamic_table_acceptance_test.go +++ b/pkg/resources/dynamic_table_acceptance_test.go @@ -4,7 +4,6 @@ import ( "context" "database/sql" "fmt" - "regexp" "strings" "testing" @@ -336,7 +335,7 @@ func TestAcc_DynamicTable_issue2329(t *testing.T) { func TestAcc_DynamicTable_issue2329_with_matching_comment(t *testing.T) { dynamicTableName := strings.ToUpper(acctest.RandStringFromCharSet(4, acctest.CharSetAlpha) + "AS" + acctest.RandStringFromCharSet(4, acctest.CharSetAlpha)) tableName := dynamicTableName + "_table" - query := fmt.Sprintf(`select "id" from "%v"."%v"."%v"`, acc.TestDatabaseName, acc.TestSchemaName, tableName) + query := fmt.Sprintf(`with temp as (select "id" from "%v"."%v"."%v") select * from temp`, acc.TestDatabaseName, acc.TestSchemaName, tableName) m := func() map[string]config.Variable { return map[string]config.Variable{ "name": config.StringVariable(dynamicTableName), @@ -361,7 +360,10 @@ func TestAcc_DynamicTable_issue2329_with_matching_comment(t *testing.T) { { ConfigDirectory: acc.ConfigurationDirectory("TestAcc_DynamicTable_issue2329/1"), ConfigVariables: m(), - ExpectError: regexp.MustCompile(`too many matches found. There is no way of getting ONLY the 'query' used to create the dynamic table from Snowflake. We try to get it from the whole creation statement but there may be cases where it fails. Please submit the issue on Github \(refer to #2329\)`), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr("snowflake_dynamic_table.dt", "name", dynamicTableName), + resource.TestCheckResourceAttr("snowflake_dynamic_table.dt", "query", query), + ), }, }, }) diff --git a/pkg/snowflake/parser.go b/pkg/snowflake/parser.go index bdd429886e..387c519d60 100644 --- a/pkg/snowflake/parser.go +++ b/pkg/snowflake/parser.go @@ -87,6 +87,36 @@ func (e *ViewSelectStatementExtractor) ExtractMaterializedView() (string, error) return string(e.input[e.pos:]), nil } +func (e *ViewSelectStatementExtractor) ExtractDynamicTable() (string, error) { + fmt.Printf("[DEBUG] extracting dynamic table query %s\n", string(e.input)) + e.consumeSpace() + e.consumeToken("create") + e.consumeSpace() + e.consumeToken("or replace") + e.consumeSpace() + e.consumeToken("dynamic table") + e.consumeSpace() + e.consumeID() + // TODO column list + e.consumeSpace() + e.consumeComment() + e.consumeSpace() + e.consumeQuotedParameter("lag") + e.consumeSpace() + e.consumeTokenParameter("warehouse") + e.consumeSpace() + e.consumeTokenParameter("refresh_mode") + e.consumeSpace() + e.consumeTokenParameter("initialize") + e.consumeSpace() + e.consumeTokenParameter("warehouse") + e.consumeSpace() + e.consumeToken("as") + e.consumeSpace() + + return string(e.input[e.pos:]), nil +} + // consumeToken will move e.pos forward iff the token is the next part of the input. Comparison is // case-insensitive. Will return true if consumed. func (e *ViewSelectStatementExtractor) consumeToken(t string) bool { @@ -134,7 +164,11 @@ func (e *ViewSelectStatementExtractor) consumeNonSpace() { } func (e *ViewSelectStatementExtractor) consumeComment() { - if c := e.consumeToken("comment"); !c { + e.consumeQuotedParameter("comment") +} + +func (e *ViewSelectStatementExtractor) consumeQuotedParameter(param string) { + if c := e.consumeToken(param); !c { return } @@ -173,6 +207,21 @@ func (e *ViewSelectStatementExtractor) consumeComment() { } } +func (e *ViewSelectStatementExtractor) consumeTokenParameter(param string) { + if c := e.consumeToken(param); !c { + return + } + + e.consumeSpace() + + if c := e.consumeToken("="); !c { + return + } + + e.consumeSpace() + e.consumeNonSpace() +} + func (e *ViewSelectStatementExtractor) consumeClusterBy() { if e.input[e.pos] != '(' { return diff --git a/pkg/snowflake/parser_test.go b/pkg/snowflake/parser_test.go index 7eff62c371..f59ee1df81 100644 --- a/pkg/snowflake/parser_test.go +++ b/pkg/snowflake/parser_test.go @@ -137,6 +137,69 @@ from bar;` } } +func TestViewSelectStatementExtractor_ExtractDynamicTable(t *testing.T) { + basic := "create dynamic table foo lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;" + caps := "CREATE DYNAMIC TABLE FOO LAG = 'DOWNSTREAM' REFRESH_MODE = 'AUTO' INITIALIZE = 'ON_CREATE' WAREHOUSE = COMPUTE_WH AS SELECT * FROM BAR;" + parens := "create dynamic table foo lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as (select * from bar);" + multiline := ` +create dynamic table foo +lag = 'DOWNSTREAM' +refresh_mode = 'AUTO' +initialize = 'ON_CREATE' +warehouse = COMPUTE_WH +as select * from bar;` + + multilineComment := ` +create dynamic table foo +lag = 'DOWNSTREAM' +refresh_mode = 'AUTO' +initialize = 'ON_CREATE' +warehouse = COMPUTE_WH +as +-- comment +select * +from bar;` + + comment := `create dynamic table foo comment = 'asdf' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` + commentEscape := `create dynamic table foo comment = 'asdf\'s are fun' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` + orReplace := `create or replace dynamic table foo comment = 'asdf' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` + identifier := `create or replace dynamic table "foo"."bar"."bam" comment = 'asdf\'s are fun' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` + + type args struct { + input string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + {"basic", args{basic}, "select * from bar;", false}, + {"caps", args{caps}, "SELECT * FROM BAR;", false}, + {"parens", args{parens}, "(select * from bar);", false}, + {"multiline", args{multiline}, "select *\nfrom bar;", false}, + {"multilineComment", args{multilineComment}, "-- comment\nselect *\nfrom bar;", false}, + {"comment", args{comment}, "select * from bar;", false}, + {"commentEscape", args{commentEscape}, "select * from bar;", false}, + {"orReplace", args{orReplace}, "select * from bar;", false}, + {"identifier", args{identifier}, "select * from bar;", false}, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + e := NewViewSelectStatementExtractor(tt.args.input) + got, err := e.ExtractDynamicTable() + if (err != nil) != tt.wantErr { + t.Errorf("ViewSelectStatementExtractor.ExtractDynamicTable() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("ViewSelectStatementExtractor.ExtractDynamicTable() = '%v', want '%v'", got, tt.want) + } + }) + } +} + func TestViewSelectStatementExtractor_consumeToken(t *testing.T) { type fields struct { input []rune From 615f7be1379b0dfcc142f0e7d247c7c4067ca9d7 Mon Sep 17 00:00:00 2001 From: Sonya Huang Date: Sun, 11 Feb 2024 11:03:38 -0500 Subject: [PATCH 2/2] Nake DT query parser handle two comment positions The "correct" place to specify the query comment in a dynamic table DDL is at the end of the metadata argument list, right before the query. However, when retrieving an existing query from Snowflake using SHOW DYNAMIC TABLE, the comment is placed at the beginning of the metadata argument list. This commit updates the DT query parser to handle comments that can show up in either position. --- pkg/snowflake/parser.go | 2 ++ pkg/snowflake/parser_test.go | 16 +++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pkg/snowflake/parser.go b/pkg/snowflake/parser.go index 387c519d60..e351a57b7c 100644 --- a/pkg/snowflake/parser.go +++ b/pkg/snowflake/parser.go @@ -111,6 +111,8 @@ func (e *ViewSelectStatementExtractor) ExtractDynamicTable() (string, error) { e.consumeSpace() e.consumeTokenParameter("warehouse") e.consumeSpace() + e.consumeComment() + e.consumeSpace() e.consumeToken("as") e.consumeSpace() diff --git a/pkg/snowflake/parser_test.go b/pkg/snowflake/parser_test.go index f59ee1df81..6f733f6d6e 100644 --- a/pkg/snowflake/parser_test.go +++ b/pkg/snowflake/parser_test.go @@ -147,7 +147,8 @@ lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH -as select * from bar;` +as select * +from bar;` multilineComment := ` create dynamic table foo @@ -160,10 +161,14 @@ as select * from bar;` - comment := `create dynamic table foo comment = 'asdf' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` - commentEscape := `create dynamic table foo comment = 'asdf\'s are fun' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` - orReplace := `create or replace dynamic table foo comment = 'asdf' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` - identifier := `create or replace dynamic table "foo"."bar"."bam" comment = 'asdf\'s are fun' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` + comment := `create dynamic table foo lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH comment = 'asdf' as select * from bar;` + commentEscape := `create dynamic table foo lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH comment = 'asdf\'s are fun' as select * from bar;` + orReplace := `create or replace dynamic table foo lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH comment = 'asdf' as select * from bar;` + identifier := `create or replace dynamic table "foo"."bar"."bam" lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH comment = 'asdf\'s are fun' as select * from bar;` + // running SHOW DYNAMIC TABLE in Snowflake actually returns the query with + // the comment before other parameters, even though this is inconsistent + // with the order they are specified in CREATE DYNAMIC TABLE + commentBeforeOtherParams := `create dynamic table foo comment = 'asdf\'s are fun' lag = 'DOWNSTREAM' refresh_mode = 'AUTO' initialize = 'ON_CREATE' warehouse = COMPUTE_WH as select * from bar;` type args struct { input string @@ -183,6 +188,7 @@ from bar;` {"commentEscape", args{commentEscape}, "select * from bar;", false}, {"orReplace", args{orReplace}, "select * from bar;", false}, {"identifier", args{identifier}, "select * from bar;", false}, + {"commentBeforeOtherParams", args{commentBeforeOtherParams}, "select * from bar;", false}, } for _, tt := range tests { tt := tt