Skip to content

Commit

Permalink
[Exporter] export databricks_repo for Git Folders outside of `/Repo…
Browse files Browse the repository at this point in the history
…s` (#4308)

## Changes
<!-- Summary of your changes that are easy to understand -->

Resolves #3672

## Tests
<!-- 
How is this tested? Please see the checklist below and also describe any
other relevant tests
-->

- [x] `make test` run locally
- [x] relevant change in `docs/` folder
- [ ] covered with integration tests in `internal/acceptance`
- [ ] relevant acceptance tests are passing
- [ ] using Go SDK
  • Loading branch information
alexott authored Dec 13, 2024
1 parent 3077b79 commit 18d13bf
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 56 deletions.
2 changes: 1 addition & 1 deletion docs/guides/experimental-exporter.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Services are just logical groups of resources used for filtering and organizatio
* `policies` - **listing** [databricks_cluster_policy](../resources/cluster_policy).
* `pools` - **listing** [instance pools](../resources/instance_pool.md).
* `queries` - **listing** [databricks_query](../resources/query.md).
* `repos` - **listing** [databricks_repo](../resources/repo.md)
* `repos` - **listing** [databricks_repo](../resources/repo.md) (both classical Repos in `/Repos` and Git Folders in artbitrary locations).
* `secrets` - **listing** [databricks_secret_scope](../resources/secret_scope.md) along with [keys](../resources/secret.md) and [ACLs](../resources/secret_acl.md).
* `settings` - **listing** [databricks_notification_destination](../resources/notification_destination.md).
* `sql-dashboards` - **listing** Legacy [databricks_sql_dashboard](../resources/sql_dashboard.md) along with associated [databricks_sql_widget](../resources/sql_widget.md) and [databricks_sql_visualization](../resources/sql_visualization.md).
Expand Down
9 changes: 9 additions & 0 deletions exporter/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ import (

type resourceChannel chan *resource

type gitInfoCacheEntry struct {
IsPresent bool
RepoId int64
}

type importContext struct {
// not modified/used only in single thread
Module string
Expand Down Expand Up @@ -139,6 +144,9 @@ type importContext struct {
oldWorkspaceObjects []workspace.ObjectStatus
oldWorkspaceObjectMapping map[int64]string

gitInfoCache map[string]gitInfoCacheEntry
gitInfoCacheMutex sync.RWMutex

builtInPolicies map[string]compute.PolicyFamily
builtInPoliciesMutex sync.Mutex

Expand Down Expand Up @@ -256,6 +264,7 @@ func newImportContext(c *common.DatabricksClient) *importContext {
allWorkspaceObjects: []workspace.ObjectStatus{},
oldWorkspaceObjects: []workspace.ObjectStatus{},
oldWorkspaceObjectMapping: map[int64]string{},
gitInfoCache: map[string]gitInfoCacheEntry{},
workspaceConfKeys: workspaceConfKeys,
shImports: map[string]bool{},
notebooksFormat: "SOURCE",
Expand Down
45 changes: 43 additions & 2 deletions exporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/databricks/terraform-provider-databricks/clusters"
"github.com/databricks/terraform-provider-databricks/commands"
"github.com/databricks/terraform-provider-databricks/common"
"github.com/databricks/terraform-provider-databricks/internal/service/workspace_tf"
"github.com/databricks/terraform-provider-databricks/jobs"
"github.com/databricks/terraform-provider-databricks/qa"
"github.com/databricks/terraform-provider-databricks/repos"
Expand Down Expand Up @@ -288,7 +289,7 @@ var emptyConnections = qa.HTTPFixture{
var emptyRepos = qa.HTTPFixture{
Method: "GET",
ReuseRequest: true,
Resource: "/api/2.0/repos?",
Resource: "/api/2.0/repos?path_prefix=%2FWorkspace",
Response: repos.ReposListResponse{},
}

Expand Down Expand Up @@ -830,6 +831,16 @@ func TestImportingClusters(t *testing.T) {
meAdminFixture,
noCurrentMetastoreAttached,
emptyRepos,
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2Flibs%2Ftest.whl&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2Frepo%2Ftest.sh&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/preview/scim/v2/Groups?",
Expand Down Expand Up @@ -1494,6 +1505,11 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
},
},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2Ffoo%2Fbar.py&return_git_info=true",
Response: workspace.ObjectStatus{},
},
},
func(ctx context.Context, client *common.DatabricksClient) {
ic := newImportContext(client)
Expand Down Expand Up @@ -1743,7 +1759,7 @@ func TestImportingRepos(t *testing.T) {
userReadFixture,
{
Method: "GET",
Resource: "/api/2.0/repos?",
Resource: "/api/2.0/repos?path_prefix=%2FWorkspace",
Response: repos.ReposListResponse{
Repos: []repos.ReposInformation{
resp,
Expand Down Expand Up @@ -2184,6 +2200,16 @@ func TestImportingDLTPipelines(t *testing.T) {
Resource: "/api/2.0/permissions/files/789?",
Response: getJSONObject("test-data/get-workspace-file-permissions.json"),
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2FTest%20DLT&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2Finit.sh&return_git_info=true",
Response: workspace.ObjectStatus{},
},
},
func(ctx context.Context, client *common.DatabricksClient) {
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
Expand Down Expand Up @@ -2277,6 +2303,16 @@ func TestImportingDLTPipelinesMatchingOnly(t *testing.T) {
Resource: "/api/2.0/instance-profiles/list",
Response: getJSONObject("test-data/list-instance-profiles.json"),
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2FTest%20DLT&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2Finit.sh&return_git_info=true",
Response: workspace.ObjectStatus{},
},
},
func(ctx context.Context, client *common.DatabricksClient) {
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
Expand Down Expand Up @@ -2975,6 +3011,11 @@ func TestImportingLakeviewDashboards(t *testing.T) {
WarehouseId: "1234",
},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FDashboard1.lvdash.json&return_git_info=true",
Response: workspace_tf.ObjectInfo{},
},
},
func(ctx context.Context, client *common.DatabricksClient) {
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
Expand Down
69 changes: 44 additions & 25 deletions exporter/importables.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
},
List: func(ic *importContext) error {
clusters, err := clusters.NewClustersAPI(ic.Context, ic.Client).List()
Expand Down Expand Up @@ -470,6 +472,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.notebook_task.base_parameters", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "path",
Expand All @@ -492,6 +496,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
},
Import: func(ic *importContext, r *resource) error {
var job jobs.JobSettingsResource
Expand Down Expand Up @@ -579,8 +585,8 @@ var resourcesMap map[string]importable = map[string]importable{
}
if task.DbtTask.Source == "WORKSPACE" {
directory := task.DbtTask.ProjectDirectory
if strings.HasPrefix(directory, "/Repos") {
ic.emitRepoByPath(directory)
if ic.isInRepoOrGitFolder(directory, true) {
ic.emitRepoOrGitFolder(directory, true)
} else {
// Traverse the dbt project directory and emit all objects found in it
nbAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
Expand Down Expand Up @@ -1456,40 +1462,51 @@ var resourcesMap map[string]importable = map[string]importable{
return nameNormalizationRegex.ReplaceAllString(name[7:], "_") + "_" + d.Id()
},
Search: func(ic *importContext, r *resource) error {
reposAPI := repos.NewReposAPI(ic.Context, ic.Client)
notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
repoDir, err := notebooksAPI.Read(r.Value)
repoDir, err := ic.workspaceClient.Workspace.GetStatusByPath(ic.Context, r.Value)
if err != nil {
return err
}
repo, err := reposAPI.Read(fmt.Sprintf("%d", repoDir.ObjectID))
if err != nil {
return err
if repoDir.ObjectType != sdk_workspace.ObjectTypeRepo {
return fmt.Errorf("object %s is not a repo", r.Value)
}
if repoDir.ResourceId != "" {
r.ID = repoDir.ResourceId
} else {
r.ID = strconv.FormatInt(repoDir.ObjectId, 10)
}
r.ID = fmt.Sprintf("%d", repo.ID)
return nil
},
List: func(ic *importContext) error {
objList, err := repos.NewReposAPI(ic.Context, ic.Client).ListAll()
if err != nil {
return err
}
for offset, repo := range objList {
it := ic.workspaceClient.Repos.List(ic.Context, sdk_workspace.ListReposRequest{PathPrefix: "/Workspace"})
i := 1
for it.HasNext(ic.Context) {
repo, err := it.Next(ic.Context)
if err != nil {
return err
}
if repo.Url != "" {
ic.Emit(&resource{
Resource: "databricks_repo",
ID: fmt.Sprintf("%d", repo.ID),
ID: strconv.FormatInt(repo.Id, 10),
})
} else {
log.Printf("[WARN] ignoring databricks_repo without Git provider. Path: %s", repo.Path)
ic.addIgnoredResource(fmt.Sprintf("databricks_repo. path=%s", repo.Path))
}
log.Printf("[INFO] Scanned %d of %d repos", offset+1, len(objList))
if i%50 == 0 {
log.Printf("[INFO] Scanned %d repos", i)
}
i++
}
return nil
},
Import: func(ic *importContext, r *resource) error {
ic.emitUserOrServicePrincipalForPath(r.Data.Get("path").(string), "/Repos")
path := maybeStripWorkspacePrefix(r.Data.Get("path").(string))
if strings.HasPrefix(path, "/Repos") {
ic.emitUserOrServicePrincipalForPath(path, "/Repos")
} else if strings.HasPrefix(path, "/Users") {
ic.emitUserOrServicePrincipalForPath(path, "/Users")
}
ic.emitPermissionsIfNotIgnored(r, fmt.Sprintf("/repos/%s", r.ID),
"repo_"+ic.Importables["databricks_repo"].Name(ic, r.Data))
return nil
Expand Down Expand Up @@ -1518,12 +1535,15 @@ var resourcesMap map[string]importable = map[string]importable{
}
return shouldIgnore
},

Depends: []reference{
{Path: "path", Resource: "databricks_user", Match: "repos",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_service_principal", Match: "repos",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_user", Match: "home",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_service_principal", Match: "home",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
},
},
"databricks_workspace_conf": {
Expand Down Expand Up @@ -2236,6 +2256,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
},
},
"databricks_directory": {
Expand Down Expand Up @@ -3436,8 +3458,8 @@ var resourcesMap map[string]importable = map[string]importable{
},
Import: func(ic *importContext, r *resource) error {
path := r.Data.Get("path").(string)
if strings.HasPrefix(path, "/Repos") {
ic.emitRepoByPath(path)
if ic.isInRepoOrGitFolder(path, false) {
ic.emitRepoOrGitFolder(path, false)
return nil
}
parts := strings.Split(path, "/")
Expand All @@ -3459,10 +3481,7 @@ var resourcesMap map[string]importable = map[string]importable{
"dashboard_"+ic.Importables["databricks_dashboard"].Name(ic, r.Data))
parentPath := r.Data.Get("parent_path").(string)
if parentPath != "" && parentPath != "/" {
ic.Emit(&resource{
Resource: "databricks_directory",
ID: parentPath,
})
ic.emitDirectoryOrRepo(parentPath)
}
warehouseId := r.Data.Get("warehouse_id").(string)
if warehouseId != "" {
Expand All @@ -3478,7 +3497,7 @@ var resourcesMap map[string]importable = map[string]importable{
return pathString == "dashboard_change_detected" || shouldOmitMd5Field(ic, pathString, as, d)
},
Ignore: func(ic *importContext, r *resource) bool {
return strings.HasPrefix(r.Data.Get("path").(string), "/Repos") || strings.HasPrefix(r.Data.Get("parent_path").(string), "/Repos")
return ic.isInRepoOrGitFolder(r.Data.Get("path").(string), false) || ic.isInRepoOrGitFolder(r.Data.Get("parent_path").(string), true)
},
Depends: []reference{
{Path: "file_path", File: true},
Expand Down
77 changes: 58 additions & 19 deletions exporter/importables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func importContextForTest() *importContext {
allSps: map[string]scim.User{},
channels: makeResourcesChannels(),
oldWorkspaceObjectMapping: map[int64]string{},
gitInfoCache: map[string]gitInfoCacheEntry{},
exportDeletedUsersAssets: false,
ignoredResources: map[string]struct{}{},
deletedResources: map[string]struct{}{},
Expand Down Expand Up @@ -1525,29 +1526,67 @@ func TestEmitSqlParent(t *testing.T) {
}

func TestEmitFilesFromSlice(t *testing.T) {
ic := importContextForTest()
ic.enableServices("storage,notebooks,wsfiles")
ic.emitFilesFromSlice([]string{
"dbfs:/FileStore/test.txt",
"/Workspace/Shared/test.txt",
"nothing",
qa.HTTPFixturesApply(t, []qa.HTTPFixture{
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Ftest.txt&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Fgit%2Ftest.txt&return_git_info=true",
Response: workspace.ObjectStatus{
GitInfo: &sdk_workspace.RepoInfo{
Id: 1234,
},
},
},
}, func(ctx context.Context, client *common.DatabricksClient) {
ic := importContextForTestWithClient(ctx, client)
ic.enableServices("storage,notebooks,wsfiles,repos")
ic.emitFilesFromSlice([]string{
"dbfs:/FileStore/test.txt",
"/Workspace/Shared/test.txt",
"/Workspace/Shared/git/test.txt",
"nothing",
})
assert.Equal(t, 3, len(ic.testEmits))
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_repo[<unknown>] (id: 1234)")
})
assert.Equal(t, 2, len(ic.testEmits))
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
}

func TestEmitFilesFromMap(t *testing.T) {
ic := importContextForTest()
ic.enableServices("storage,notebooks,wsfiles")
ic.emitFilesFromMap(map[string]string{
"k1": "dbfs:/FileStore/test.txt",
"k2": "/Workspace/Shared/test.txt",
"k3": "nothing",
})
assert.Equal(t, 2, len(ic.testEmits))
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
qa.HTTPFixturesApply(t, []qa.HTTPFixture{
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Ftest.txt&return_git_info=true",
Response: workspace.ObjectStatus{},
},
{
Method: "GET",
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Fgit%2Ftest.txt&return_git_info=true",
Response: workspace.ObjectStatus{
GitInfo: &sdk_workspace.RepoInfo{
Id: 1234,
},
},
},
}, func(ctx context.Context, client *common.DatabricksClient) {
ic := importContextForTestWithClient(ctx, client)
ic.enableServices("storage,notebooks,wsfiles,repos")
ic.emitFilesFromMap(map[string]string{
"k1": "dbfs:/FileStore/test.txt",
"k2": "/Workspace/Shared/test.txt",
"k3": "nothing",
"k4": "/Workspace/Shared/git/test.txt",
})
assert.Equal(t, 3, len(ic.testEmits))
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
assert.Contains(t, ic.testEmits, "databricks_repo[<unknown>] (id: 1234)")
})
}

func TestStorageCredentialListFails(t *testing.T) {
Expand Down
Loading

0 comments on commit 18d13bf

Please sign in to comment.