From 1e9b863e505f523b6d5b76244867217a8b5411a3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 8 Sep 2023 14:37:19 +0200 Subject: [PATCH 1/3] Apply Python wheel trampoline if workspace library is used --- bundle/artifacts/artifacts_test.go | 2 + bundle/config/artifact.go | 6 +- bundle/libraries/libraries.go | 81 ++-------------- bundle/libraries/utils/utils.go | 97 +++++++++++++++++++ .../utils_test.go} | 4 +- bundle/python/transform.go | 13 ++- bundle/python/transform_test.go | 2 +- 7 files changed, 121 insertions(+), 84 deletions(-) create mode 100644 bundle/libraries/utils/utils.go rename bundle/libraries/{libraries_test.go => utils/utils_test.go} (89%) diff --git a/bundle/artifacts/artifacts_test.go b/bundle/artifacts/artifacts_test.go index 4c0a18f3889..bbae44efa96 100644 --- a/bundle/artifacts/artifacts_test.go +++ b/bundle/artifacts/artifacts_test.go @@ -105,6 +105,7 @@ func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) { b.WorkspaceClient().Workspace.WithImpl(MockWorkspaceService{}) artifact := &config.Artifact{ + Type: "whl", Files: []config.ArtifactFile{ { Source: whlPath, @@ -118,4 +119,5 @@ func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) { err := uploadArtifact(context.Background(), artifact, b) require.NoError(t, err) require.Regexp(t, regexp.MustCompile("/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].RemotePath) + require.Regexp(t, regexp.MustCompile("/Workspace/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].Libraries[0].Whl) } diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index 1955e265db4..e5267b6c52f 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -9,6 +9,7 @@ import ( "strings" "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/bundle/libraries/utils" "github.com/databricks/databricks-sdk-go/service/compute" ) @@ -78,10 +79,7 @@ func (a *Artifact) NormalisePaths() { remotePath := path.Join(wsfsBase, f.RemotePath) for i := range f.Libraries { lib := f.Libraries[i] - switch a.Type { - case ArtifactPythonWheel: - lib.Whl = remotePath - } + utils.ReplacePath(lib, remotePath) } } diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go index d9a257db887..b1ab6aa0229 100644 --- a/bundle/libraries/libraries.go +++ b/bundle/libraries/libraries.go @@ -3,12 +3,11 @@ package libraries import ( "context" "fmt" - "net/url" "path/filepath" - "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/libraries/utils" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" @@ -60,7 +59,7 @@ func FindAllWheelTasksWithLocalLibraries(b *bundle.Bundle) []*jobs.Task { tasks := findAllTasks(b) wheelTasks := make([]*jobs.Task, 0) for _, task := range tasks { - if task.PythonWheelTask != nil && IsTaskWithLocalLibraries(task) { + if task.PythonWheelTask != nil && utils.IsTaskWithLocalLibraries(task) { wheelTasks = append(wheelTasks, task) } } @@ -68,16 +67,6 @@ func FindAllWheelTasksWithLocalLibraries(b *bundle.Bundle) []*jobs.Task { return wheelTasks } -func IsTaskWithLocalLibraries(task *jobs.Task) bool { - for _, l := range task.Libraries { - if isLocalLibrary(&l) { - return true - } - } - - return false -} - func isMissingRequiredLibraries(task *jobs.Task) bool { if task.Libraries != nil { return false @@ -87,12 +76,12 @@ func isMissingRequiredLibraries(task *jobs.Task) bool { } func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) { - path := libPath(lib) - if path == "" { + path := utils.LibPath(lib) + if path == nil { return nil, nil } - fullPath := filepath.Join(b.Config.Path, path) + fullPath := filepath.Join(b.Config.Path, *path) return filepath.Glob(fullPath) } @@ -102,8 +91,8 @@ func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b return err } - if len(matches) == 0 && isLocalLibrary(lib) { - return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", libPath(lib)) + if len(matches) == 0 && utils.IsLocalLibrary(lib) { + return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", utils.LibPath(lib)) } for _, match := range matches { @@ -129,59 +118,3 @@ func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.Artifac return nil, fmt.Errorf("artifact section is not defined for file at %s", path) } - -func libPath(library *compute.Library) string { - if library.Whl != "" { - return library.Whl - } - if library.Jar != "" { - return library.Jar - } - if library.Egg != "" { - return library.Egg - } - - return "" -} - -func isLocalLibrary(library *compute.Library) bool { - path := libPath(library) - if path == "" { - return false - } - - if isExplicitFileScheme(path) { - return true - } - - if isRemoteStorageScheme(path) { - return false - } - - return !isWorkspacePath(path) -} - -func isExplicitFileScheme(path string) bool { - return strings.HasPrefix(path, "file://") -} - -func isRemoteStorageScheme(path string) bool { - url, err := url.Parse(path) - if err != nil { - return false - } - - if url.Scheme == "" { - return false - } - - // If the path starts with scheme:/ format, it's a correct remote storage scheme - return strings.HasPrefix(path, url.Scheme+":/") - -} - -func isWorkspacePath(path string) bool { - return strings.HasPrefix(path, "/Workspace/") || - strings.HasPrefix(path, "/Users/") || - strings.HasPrefix(path, "/Shared/") -} diff --git a/bundle/libraries/utils/utils.go b/bundle/libraries/utils/utils.go new file mode 100644 index 00000000000..7b51d03ed6a --- /dev/null +++ b/bundle/libraries/utils/utils.go @@ -0,0 +1,97 @@ +package utils + +import ( + "net/url" + "strings" + + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +func ReplacePath(lib *compute.Library, newPath string) { + path := LibPath(lib) + if path == nil { + return + } + + *path = newPath +} + +func LibPath(library *compute.Library) *string { + if library.Whl != "" { + return &library.Whl + } + if library.Jar != "" { + return &library.Jar + } + if library.Egg != "" { + return &library.Egg + } + + return nil +} + +func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool { + for _, l := range task.Libraries { + path := LibPath(&l) + if path == nil { + continue + } + if isWorkspacePath(*path) { + return true + } + } + + return false +} + +func IsTaskWithLocalLibraries(task *jobs.Task) bool { + for _, l := range task.Libraries { + if IsLocalLibrary(&l) { + return true + } + } + return false +} + +func isWorkspacePath(path string) bool { + return strings.HasPrefix(path, "/Workspace/") || + strings.HasPrefix(path, "/Users/") || + strings.HasPrefix(path, "/Shared/") +} + +func IsLocalLibrary(library *compute.Library) bool { + path := LibPath(library) + if path == nil { + return false + } + + if isExplicitFileScheme(*path) { + return true + } + + if isRemoteStorageScheme(*path) { + return false + } + + return !isWorkspacePath(*path) +} + +func isExplicitFileScheme(path string) bool { + return strings.HasPrefix(path, "file://") +} + +func isRemoteStorageScheme(path string) bool { + url, err := url.Parse(path) + if err != nil { + return false + } + + if url.Scheme == "" { + return false + } + + // If the path starts with scheme:/ format, it's a correct remote storage scheme + return strings.HasPrefix(path, url.Scheme+":/") + +} diff --git a/bundle/libraries/libraries_test.go b/bundle/libraries/utils/utils_test.go similarity index 89% rename from bundle/libraries/libraries_test.go rename to bundle/libraries/utils/utils_test.go index 7ff1609ab07..773a5b752c5 100644 --- a/bundle/libraries/libraries_test.go +++ b/bundle/libraries/utils/utils_test.go @@ -1,4 +1,4 @@ -package libraries +package utils import ( "fmt" @@ -26,6 +26,6 @@ func TestIsLocalLbrary(t *testing.T) { lib := compute.Library{ Whl: p, } - require.Equal(t, result, isLocalLibrary(&lib), fmt.Sprintf("isLocalLibrary must return %t for path %s ", result, p)) + require.Equal(t, result, IsLocalLibrary(&lib), fmt.Sprintf("isLocalLibrary must return %t for path %s ", result, p)) } } diff --git a/bundle/python/transform.go b/bundle/python/transform.go index 3d744df9dc3..85ccce7db46 100644 --- a/bundle/python/transform.go +++ b/bundle/python/transform.go @@ -7,7 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/libraries/utils" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -73,8 +73,11 @@ func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { for i := range tasks { task := &tasks[i] - // Keep only Python wheel tasks with local libraries referenced - if task.PythonWheelTask == nil || !libraries.IsTaskWithLocalLibraries(task) { + // Keep only Python wheel tasks with workspace libraries referenced. + // At this point of moment we don't have local paths in Libraries sections anymore + // Local paths have been replaced with the remote when the artifacts where uploaded + // in artifacts.UploadAll mutator. + if task.PythonWheelTask == nil || !needsTrampoline(task) { continue } @@ -87,6 +90,10 @@ func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { return result } +func needsTrampoline(task *jobs.Task) bool { + return utils.IsTaskWithWorkspaceLibraries(task) +} + func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, error) { params, err := t.generateParameters(task.PythonWheelTask) if err != nil { diff --git a/bundle/python/transform_test.go b/bundle/python/transform_test.go index 99d3129d8ca..a7448f234e7 100644 --- a/bundle/python/transform_test.go +++ b/bundle/python/transform_test.go @@ -84,7 +84,7 @@ func TestTransformFiltersWheelTasksOnly(t *testing.T) { TaskKey: "key1", PythonWheelTask: &jobs.PythonWheelTask{}, Libraries: []compute.Library{ - {Whl: "./dist/test.whl"}, + {Whl: "/Workspace/Users/test@test.com/bundle/dist/test.whl"}, }, }, { From bd4b1ccd2df6252f40bca026fb89233fff0f87e9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 8 Sep 2023 15:12:37 +0200 Subject: [PATCH 2/3] fix error message --- bundle/libraries/libraries.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go index b1ab6aa0229..9e3481f1c90 100644 --- a/bundle/libraries/libraries.go +++ b/bundle/libraries/libraries.go @@ -92,7 +92,7 @@ func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b } if len(matches) == 0 && utils.IsLocalLibrary(lib) { - return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", utils.LibPath(lib)) + return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", *utils.LibPath(lib)) } for _, match := range matches { From 2a97bcbfb414bf536394bcfd09cdcc3c5318f5b1 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 8 Sep 2023 15:35:43 +0200 Subject: [PATCH 3/3] refactored --- bundle/config/artifact.go | 10 +- bundle/libraries/libraries.go | 92 ++++++++++++++++-- .../utils_test.go => libraries_test.go} | 4 +- bundle/libraries/utils/utils.go | 97 ------------------- bundle/python/transform.go | 4 +- 5 files changed, 97 insertions(+), 110 deletions(-) rename bundle/libraries/{utils/utils_test.go => libraries_test.go} (89%) delete mode 100644 bundle/libraries/utils/utils.go diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index e5267b6c52f..d7048a02ecc 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -9,7 +9,6 @@ import ( "strings" "github.com/databricks/cli/bundle/config/paths" - "github.com/databricks/cli/bundle/libraries/utils" "github.com/databricks/databricks-sdk-go/service/compute" ) @@ -79,7 +78,14 @@ func (a *Artifact) NormalisePaths() { remotePath := path.Join(wsfsBase, f.RemotePath) for i := range f.Libraries { lib := f.Libraries[i] - utils.ReplacePath(lib, remotePath) + if lib.Whl != "" { + lib.Whl = remotePath + continue + } + if lib.Jar != "" { + lib.Jar = remotePath + continue + } } } diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go index 9e3481f1c90..8e2e504c545 100644 --- a/bundle/libraries/libraries.go +++ b/bundle/libraries/libraries.go @@ -3,11 +3,12 @@ package libraries import ( "context" "fmt" + "net/url" "path/filepath" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/libraries/utils" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" @@ -59,7 +60,7 @@ func FindAllWheelTasksWithLocalLibraries(b *bundle.Bundle) []*jobs.Task { tasks := findAllTasks(b) wheelTasks := make([]*jobs.Task, 0) for _, task := range tasks { - if task.PythonWheelTask != nil && utils.IsTaskWithLocalLibraries(task) { + if task.PythonWheelTask != nil && IsTaskWithLocalLibraries(task) { wheelTasks = append(wheelTasks, task) } } @@ -67,6 +68,27 @@ func FindAllWheelTasksWithLocalLibraries(b *bundle.Bundle) []*jobs.Task { return wheelTasks } +func IsTaskWithLocalLibraries(task *jobs.Task) bool { + for _, l := range task.Libraries { + if isLocalLibrary(&l) { + return true + } + } + + return false +} + +func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool { + for _, l := range task.Libraries { + path := libPath(&l) + if isWorkspacePath(path) { + return true + } + } + + return false +} + func isMissingRequiredLibraries(task *jobs.Task) bool { if task.Libraries != nil { return false @@ -76,12 +98,12 @@ func isMissingRequiredLibraries(task *jobs.Task) bool { } func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) { - path := utils.LibPath(lib) - if path == nil { + path := libPath(lib) + if path == "" { return nil, nil } - fullPath := filepath.Join(b.Config.Path, *path) + fullPath := filepath.Join(b.Config.Path, path) return filepath.Glob(fullPath) } @@ -91,8 +113,8 @@ func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b return err } - if len(matches) == 0 && utils.IsLocalLibrary(lib) { - return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", *utils.LibPath(lib)) + if len(matches) == 0 && isLocalLibrary(lib) { + return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", libPath(lib)) } for _, match := range matches { @@ -118,3 +140,59 @@ func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.Artifac return nil, fmt.Errorf("artifact section is not defined for file at %s", path) } + +func libPath(library *compute.Library) string { + if library.Whl != "" { + return library.Whl + } + if library.Jar != "" { + return library.Jar + } + if library.Egg != "" { + return library.Egg + } + + return "" +} + +func isLocalLibrary(library *compute.Library) bool { + path := libPath(library) + if path == "" { + return false + } + + if isExplicitFileScheme(path) { + return true + } + + if isRemoteStorageScheme(path) { + return false + } + + return !isWorkspacePath(path) +} + +func isExplicitFileScheme(path string) bool { + return strings.HasPrefix(path, "file://") +} + +func isRemoteStorageScheme(path string) bool { + url, err := url.Parse(path) + if err != nil { + return false + } + + if url.Scheme == "" { + return false + } + + // If the path starts with scheme:/ format, it's a correct remote storage scheme + return strings.HasPrefix(path, url.Scheme+":/") + +} + +func isWorkspacePath(path string) bool { + return strings.HasPrefix(path, "/Workspace/") || + strings.HasPrefix(path, "/Users/") || + strings.HasPrefix(path, "/Shared/") +} diff --git a/bundle/libraries/utils/utils_test.go b/bundle/libraries/libraries_test.go similarity index 89% rename from bundle/libraries/utils/utils_test.go rename to bundle/libraries/libraries_test.go index 773a5b752c5..7ff1609ab07 100644 --- a/bundle/libraries/utils/utils_test.go +++ b/bundle/libraries/libraries_test.go @@ -1,4 +1,4 @@ -package utils +package libraries import ( "fmt" @@ -26,6 +26,6 @@ func TestIsLocalLbrary(t *testing.T) { lib := compute.Library{ Whl: p, } - require.Equal(t, result, IsLocalLibrary(&lib), fmt.Sprintf("isLocalLibrary must return %t for path %s ", result, p)) + require.Equal(t, result, isLocalLibrary(&lib), fmt.Sprintf("isLocalLibrary must return %t for path %s ", result, p)) } } diff --git a/bundle/libraries/utils/utils.go b/bundle/libraries/utils/utils.go deleted file mode 100644 index 7b51d03ed6a..00000000000 --- a/bundle/libraries/utils/utils.go +++ /dev/null @@ -1,97 +0,0 @@ -package utils - -import ( - "net/url" - "strings" - - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" -) - -func ReplacePath(lib *compute.Library, newPath string) { - path := LibPath(lib) - if path == nil { - return - } - - *path = newPath -} - -func LibPath(library *compute.Library) *string { - if library.Whl != "" { - return &library.Whl - } - if library.Jar != "" { - return &library.Jar - } - if library.Egg != "" { - return &library.Egg - } - - return nil -} - -func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool { - for _, l := range task.Libraries { - path := LibPath(&l) - if path == nil { - continue - } - if isWorkspacePath(*path) { - return true - } - } - - return false -} - -func IsTaskWithLocalLibraries(task *jobs.Task) bool { - for _, l := range task.Libraries { - if IsLocalLibrary(&l) { - return true - } - } - return false -} - -func isWorkspacePath(path string) bool { - return strings.HasPrefix(path, "/Workspace/") || - strings.HasPrefix(path, "/Users/") || - strings.HasPrefix(path, "/Shared/") -} - -func IsLocalLibrary(library *compute.Library) bool { - path := LibPath(library) - if path == nil { - return false - } - - if isExplicitFileScheme(*path) { - return true - } - - if isRemoteStorageScheme(*path) { - return false - } - - return !isWorkspacePath(*path) -} - -func isExplicitFileScheme(path string) bool { - return strings.HasPrefix(path, "file://") -} - -func isRemoteStorageScheme(path string) bool { - url, err := url.Parse(path) - if err != nil { - return false - } - - if url.Scheme == "" { - return false - } - - // If the path starts with scheme:/ format, it's a correct remote storage scheme - return strings.HasPrefix(path, url.Scheme+":/") - -} diff --git a/bundle/python/transform.go b/bundle/python/transform.go index 85ccce7db46..d8eb33f540b 100644 --- a/bundle/python/transform.go +++ b/bundle/python/transform.go @@ -7,7 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/libraries/utils" + "github.com/databricks/cli/bundle/libraries" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -91,7 +91,7 @@ func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { } func needsTrampoline(task *jobs.Task) bool { - return utils.IsTaskWithWorkspaceLibraries(task) + return libraries.IsTaskWithWorkspaceLibraries(task) } func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, error) {