Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add paths field to bundle sync configuration #1694

Merged
merged 20 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ type Bundle struct {
// Exclusively use this field for filesystem operations.
BundleRoot vfs.Path

// SyncRoot is a virtual filesystem path to the root of the files that
// are synchronized to the workspace. It can be an ancestor to [BundleRoot],
// but not a descendant; that is, [SyncRoot] must contain [BundleRoot].
SyncRoot vfs.Path
SyncRootPath string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SyncRootPath is the remote path were files will be synced to, correct? The naming seems to be a bit confusing when I read the rest of the code

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment to clarify; this is the local path to the sync root (the same as SyncRoot.Native()).


Config config.Root

// Metadata about the bundle deployment. This is the interface Databricks services
Expand Down
4 changes: 4 additions & 0 deletions bundle/bundle_read_only.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ func (r ReadOnlyBundle) BundleRoot() vfs.Path {
return r.b.BundleRoot
}

func (r ReadOnlyBundle) SyncRoot() vfs.Path {
return r.b.SyncRoot
}

func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient {
return r.b.WorkspaceClient()
}
Expand Down
4 changes: 2 additions & 2 deletions bundle/config/mutator/configure_wsfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (m *configureWSFS) Name() string {
}

func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
root := b.BundleRoot.Native()
root := b.SyncRoot.Native()

// The bundle root must be located in /Workspace/
if !strings.HasPrefix(root, "/Workspace/") {
Expand All @@ -45,6 +45,6 @@ func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return diag.FromErr(err)
}

b.BundleRoot = p
b.SyncRoot = p
return nil
}
4 changes: 4 additions & 0 deletions bundle/config/mutator/rewrite_sync_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func (m *rewriteSyncPaths) makeRelativeTo(root string) dyn.MapFunc {
func (m *rewriteSyncPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
return dyn.Map(v, "sync", func(_ dyn.Path, v dyn.Value) (nv dyn.Value, err error) {
v, err = dyn.Map(v, "paths", dyn.Foreach(m.makeRelativeTo(b.RootPath)))
if err != nil {
return dyn.InvalidValue, err
}
v, err = dyn.Map(v, "include", dyn.Foreach(m.makeRelativeTo(b.RootPath)))
if err != nil {
return dyn.InvalidValue, err
Expand Down
16 changes: 16 additions & 0 deletions bundle/config/mutator/rewrite_sync_paths_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
RootPath: ".",
Config: config.Root{
Sync: config.Sync{
Paths: []string{
".",
"../common",
},
Include: []string{
"foo",
"bar",
Expand All @@ -29,6 +33,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
},
}

bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml")
bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml")
bundletest.SetLocation(b, "sync.include[0]", "./file.yml")
bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml")
bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml")
Expand All @@ -37,6 +43,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) {
diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths())
assert.NoError(t, diags.Error())

assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0])
assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1])
assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0])
assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1])
assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0])
Expand All @@ -48,6 +56,10 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
RootPath: "/tmp/dir",
Config: config.Root{
Sync: config.Sync{
Paths: []string{
".",
"../common",
},
Include: []string{
"foo",
"bar",
Expand All @@ -60,6 +72,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
},
}

bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml")
bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml")
bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml")
bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml")
bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml")
Expand All @@ -68,6 +82,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) {
diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths())
assert.NoError(t, diags.Error())

assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0])
assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1])
assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0])
assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1])
assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0])
Expand Down
48 changes: 48 additions & 0 deletions bundle/config/mutator/sync_default_path.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package mutator

import (
"context"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type syncDefaultPath struct{}

// SyncDefaultPath configures the default sync path to be equal to the bundle root.
func SyncDefaultPath() bundle.Mutator {
return &syncDefaultPath{}
}

func (m *syncDefaultPath) Name() string {
return "SyncDefaultPath"
}

func (m *syncDefaultPath) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
isset := false
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
pv, _ := dyn.Get(v, "sync.paths")

// If the sync paths field is already set, do nothing.
// We know it is set if its value is either a nil or a sequence (empty or not).
switch pv.Kind() {
case dyn.KindNil, dyn.KindSequence:
isset = true
}

return v, nil
})
if err != nil {
return diag.FromErr(err)
}

// If the sync paths field is already set, do nothing.
if isset {
return nil
}

// Set the sync paths to the default value.
b.Config.Sync.Paths = []string{"."}
return nil
}
82 changes: 82 additions & 0 deletions bundle/config/mutator/sync_default_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package mutator_test

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestSyncDefaultPath_DefaultIfUnset(t *testing.T) {
b := &bundle.Bundle{
RootPath: "/tmp/some/dir",
Config: config.Root{},
}

ctx := context.Background()
diags := bundle.Apply(ctx, b, mutator.SyncDefaultPath())
require.NoError(t, diags.Error())
assert.Equal(t, []string{"."}, b.Config.Sync.Paths)
}

func TestSyncDefaultPath_SkipIfSet(t *testing.T) {
tcases := []struct {
name string
paths dyn.Value
expect []string
}{
{
name: "nil",
paths: dyn.V(nil),
expect: nil,
},
{
name: "empty sequence",
paths: dyn.V([]dyn.Value{}),
expect: []string{},
},
{
name: "non-empty sequence",
paths: dyn.V([]dyn.Value{dyn.V("something")}),
expect: []string{"something"},
},
}

for _, tcase := range tcases {
t.Run(tcase.name, func(t *testing.T) {
b := &bundle.Bundle{
RootPath: "/tmp/some/dir",
Config: config.Root{},
}

diags := bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
v, err := dyn.Set(v, "sync", dyn.V(dyn.NewMapping()))
if err != nil {
return dyn.InvalidValue, err
}
v, err = dyn.Set(v, "sync.paths", tcase.paths)
if err != nil {
return dyn.InvalidValue, err
}
return v, nil
})
return diag.FromErr(err)
})
require.NoError(t, diags.Error())

ctx := context.Background()
diags = bundle.Apply(ctx, b, mutator.SyncDefaultPath())
require.NoError(t, diags.Error())

// If the sync paths field is already set, do nothing.
assert.Equal(t, tcase.expect, b.Config.Sync.Paths)
})
}
}
120 changes: 120 additions & 0 deletions bundle/config/mutator/sync_infer_root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package mutator

import (
"context"
"fmt"
"path/filepath"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/vfs"
)

type syncInferRoot struct{}

// SyncInferRoot is a mutator that infers the root path of all files to synchronize by looking at the
// paths in the sync configuration. The sync root may be different from the bundle root
// when the user intends to synchronize files outside the bundle root.
//
// The sync root can be equivalent to or an ancestor of the bundle root, but not a descendant.
// That is, the sync root must contain the bundle root.
//
// This mutator requires all sync-related paths and patterns to be relative to the bundle root path.
// This is done by the [RewriteSyncPaths] mutator, which must run before this mutator.
func SyncInferRoot() bundle.Mutator {
return &syncInferRoot{}
}

func (m *syncInferRoot) Name() string {
return "SyncInferRoot"
}

// computeRoot finds the innermost path that contains the specified path.
// It traverses up the root path until it finds the innermost path.
// If the path does not exist, it returns an empty string.
//
// See "sync_infer_root_internal_test.go" for examples.
func (m *syncInferRoot) computeRoot(path string, root string) string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understood correctly it finds a path to which both path and root belongs, correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so, would it be simpler to make both paths absolute, find a common prefix path and then make it relative again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct. It finds the longest prefix of root that contains path.

Your suggestion might work, but it will break if path traverses through the root dir (i.e. ../../.. on /tmp). This case is handled correctly here (as in, it fails and returns an empty string), because it deals with path components one-by-one.

Separately, we still allow the bundle root path to be relative or absolute and we don't force it to be absolute anywhere. We could consider doing this but we'd need to pay attention to what happens on the presentation side of errors and warnings. It is possible we'd all of a sudden show full paths where today we show just databricks.yml.

for !filepath.IsLocal(path) {
// Break if we have reached the root of the filesystem.
dir := filepath.Dir(root)
if dir == root {
return ""
}

// Update the sync path as we navigate up the directory tree.
path = filepath.Join(filepath.Base(root), path)

// Move up the directory tree.
root = dir
}

return filepath.Clean(root)
}

func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics

// Use the bundle root path as the starting point for inferring the sync root path.
bundleRootPath := filepath.Clean(b.RootPath)

// Infer the sync root path by looking at each one of the sync paths.
// Every sync path must be a descendant of the final sync root path.
syncRootPath := bundleRootPath
for _, path := range b.Config.Sync.Paths {
computedPath := m.computeRoot(path, bundleRootPath)
if computedPath == "" {
continue
}

// Update sync root path if the computed root path is an ancestor of the current sync root path.
if len(computedPath) < len(syncRootPath) {
syncRootPath = computedPath
}
}

// The new sync root path can only be an ancestor of the previous root path.
// Compute the relative path from the sync root to the bundle root.
rel, err := filepath.Rel(syncRootPath, bundleRootPath)
if err != nil {
return diag.FromErr(err)
}

// If during computation of the sync root path we hit the root of the filesystem,
// then one or more of the sync paths are outside the filesystem.
// Check if this happened by verifying that none of the paths escape the root
// when joined with the sync root path.
for i, path := range b.Config.Sync.Paths {
if filepath.IsLocal(filepath.Join(rel, path)) {
continue
}

diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: fmt.Sprintf("invalid sync path %q", path),
Locations: b.Config.GetLocations(fmt.Sprintf("sync.paths[%d]", i)),
Paths: []dyn.Path{dyn.NewPath(dyn.Key("sync"), dyn.Key("paths"), dyn.Index(i))},
})
}

if diags.HasError() {
return diags
}

// Update all paths in the sync configuration to be relative to the sync root.
for i, p := range b.Config.Sync.Paths {
b.Config.Sync.Paths[i] = filepath.Join(rel, p)
}
for i, p := range b.Config.Sync.Include {
b.Config.Sync.Include[i] = filepath.Join(rel, p)
}
for i, p := range b.Config.Sync.Exclude {
b.Config.Sync.Exclude[i] = filepath.Join(rel, p)
}

// Configure the sync root path.
b.SyncRoot = vfs.MustNew(syncRootPath)
b.SyncRootPath = syncRootPath
return nil
}
Loading
Loading