diff --git a/pkg/controller/nodes/task/catalog/datacatalog/transformer.go b/pkg/controller/nodes/task/catalog/datacatalog/transformer.go index abf36f128..33669e3c7 100644 --- a/pkg/controller/nodes/task/catalog/datacatalog/transformer.go +++ b/pkg/controller/nodes/task/catalog/datacatalog/transformer.go @@ -11,9 +11,7 @@ import ( "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/datacatalog" "github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/catalog" - "github.com/flyteorg/flytepropeller/pkg/compiler/validators" - "github.com/flyteorg/flytestdlib/pbhash" ) @@ -116,72 +114,12 @@ func generateTaskSignatureHash(ctx context.Context, taskInterface core.TypedInte return fmt.Sprintf("%v-%v", inputHashString, outputHashString), nil } -// Hashify a literal, in other words, produce a new literal where the corresponding value is removed in case -// the literal hash is set. -func hashify(literal *core.Literal) *core.Literal { - // Two recursive cases: - // 1. A collection of literals or - // 2. A map of literals - - if literal.GetCollection() != nil { - literals := literal.GetCollection().Literals - literalsHash := make([]*core.Literal, 0) - for _, lit := range literals { - literalsHash = append(literalsHash, hashify(lit)) - } - return &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: literalsHash, - }, - }, - } - } - if literal.GetMap() != nil { - literalsMap := make(map[string]*core.Literal) - for key, lit := range literal.GetMap().Literals { - literalsMap[key] = hashify(lit) - } - return &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: literalsMap, - }, - }, - } - } - - // And a base case that consists of a scalar, where the hash might be set - if literal.GetHash() != "" { - return &core.Literal{ - Hash: literal.GetHash(), - } - } - return literal -} - // Generate a tag by hashing the input values func GenerateArtifactTagName(ctx context.Context, inputs *core.LiteralMap) (string, error) { - if inputs == nil || len(inputs.Literals) == 0 { - inputs = &emptyLiteralMap - } - - // Hashify, i.e. generate a copy of the literal map where each literal value is removed - // in case the corresponding hash is set. - hashifiedLiteralMap := make(map[string]*core.Literal, len(inputs.Literals)) - for name, literal := range inputs.Literals { - hashifiedLiteralMap[name] = hashify(literal) - } - hashifiedInputs := &core.LiteralMap{ - Literals: hashifiedLiteralMap, - } - - inputsHash, err := pbhash.ComputeHash(ctx, hashifiedInputs) + hashString, err := catalog.HashLiteralMap(ctx, inputs) if err != nil { return "", err } - - hashString := base64.RawURLEncoding.EncodeToString(inputsHash) tag := fmt.Sprintf("%s-%s", cachedTaskTag, hashString) return tag, nil } diff --git a/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go b/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go index 2ce8c1dff..1588d171e 100644 --- a/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go +++ b/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go @@ -99,33 +99,14 @@ func TestVariableMapOrder(t *testing.T) { assert.Equal(t, datasetID.String(), datasetIDDupe.String()) } -// Ensure the key order on the inputs generates the same tag -func TestInputValueSorted(t *testing.T) { +// Ensure correct format of ArtifactTagName +func TestGenerateArtifactTagName(t *testing.T) { literalMap, err := coreutils.MakeLiteralMap(map[string]interface{}{"1": 1, "2": 2}) assert.NoError(t, err) tag, err := GenerateArtifactTagName(context.TODO(), literalMap) assert.NoError(t, err) assert.Equal(t, "flyte_cached-GQid5LjHbakcW68DS3P2jp80QLbiF0olFHF2hTh5bg8", tag) - - literalMap, err = coreutils.MakeLiteralMap(map[string]interface{}{"2": 2, "1": 1}) - assert.NoError(t, err) - - tagDupe, err := GenerateArtifactTagName(context.TODO(), literalMap) - assert.NoError(t, err) - assert.Equal(t, tagDupe, tag) -} - -// Ensure that empty inputs are hashed the same way -func TestNoInputValues(t *testing.T) { - tag, err := GenerateArtifactTagName(context.TODO(), nil) - assert.NoError(t, err) - assert.Equal(t, "flyte_cached-GKw-c0PwFokMUQ6T-TUmEWnZ4_VlQ2Qpgw-vCTT0-OQ", tag) - - tagDupe, err := GenerateArtifactTagName(context.TODO(), &core.LiteralMap{Literals: nil}) - assert.NoError(t, err) - assert.Equal(t, "flyte_cached-GKw-c0PwFokMUQ6T-TUmEWnZ4_VlQ2Qpgw-vCTT0-OQ", tagDupe) - assert.Equal(t, tagDupe, tag) } func TestGetOrDefault(t *testing.T) { @@ -340,559 +321,3 @@ func TestDatasetIDToIdentifier(t *testing.T) { assert.Equal(t, "d", id.Domain) assert.Equal(t, "v", id.Version) } - -func TestGenerateArtifactTagName_LiteralsWithHashSet(t *testing.T) { - tests := []struct { - name string - literal *core.Literal - expectedLiteral *core.Literal - }{ - { - name: "single literal where hash is not set", - literal: coreutils.MustMakeLiteral(42), - expectedLiteral: coreutils.MustMakeLiteral(42), - }, - { - name: "single literal containing hash", - literal: &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "abcde", - }, - expectedLiteral: &core.Literal{ - Value: nil, - Hash: "abcde", - }, - }, - { - name: "list of literals containing a single item where literal sets its hash", - literal: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "hash1", - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: nil, - Hash: "hash1", - }, - }, - }, - }, - }, - }, - { - name: "list of literals containing two items where each literal sets its hash", - literal: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "hash1", - }, - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://another-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "hash2", - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: nil, - Hash: "hash1", - }, - &core.Literal{ - Value: nil, - Hash: "hash2", - }, - }, - }, - }, - }, - }, - { - name: "list of literals containing two items where only one literal has its hash set", - literal: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "hash1", - }, - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://another-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: nil, - Hash: "hash1", - }, - &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://another-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - { - name: "map of literals containing a single item where literal sets its hash", - literal: &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "hash-42", - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: nil, - Hash: "hash-42", - }, - }, - }, - }, - }, - }, - { - name: "map of literals containing a three items where only one literal sets its hash", - literal: &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - "literal2-set-its-hash": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-2", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "literal-2-hash", - }, - "literal3": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-3", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - "literal2-set-its-hash": &core.Literal{ - Value: nil, - Hash: "literal-2-hash", - }, - "literal3": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-3", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - { - name: "list of map of literals containing a mixture of literals have their hashes set or not set", - literal: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - "literal2-set-its-hash": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-2", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "literal-2-hash", - }, - "literal3": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-3", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "another-literal-1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-another-literal-1", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - Hash: "another-literal-1-hash", - }, - "another-literal2-set-its-hash": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-2", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expectedLiteral: &core.Literal{ - Value: &core.Literal_Collection{ - Collection: &core.LiteralCollection{ - Literals: []*core.Literal{ - &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "literal1": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - "literal2-set-its-hash": &core.Literal{ - Value: nil, - Hash: "literal-2-hash", - }, - "literal3": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-3", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - &core.Literal{ - Value: &core.Literal_Map{ - Map: &core.LiteralMap{ - Literals: map[string]*core.Literal{ - "another-literal-1": &core.Literal{ - Value: nil, - Hash: "another-literal-1-hash", - }, - "another-literal2-set-its-hash": &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_StructuredDataset{ - StructuredDataset: &core.StructuredDataset{ - Uri: "my-blob-stora://some-address-for-literal-2", - Metadata: &core.StructuredDatasetMetadata{ - StructuredDatasetType: &core.StructuredDatasetType{ - Format: "my-columnar-data-format", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.expectedLiteral, hashify(tt.literal)) - - // Double-check that generating a tag is successful - literalMap := &core.LiteralMap{Literals: map[string]*core.Literal{"o0": tt.literal}} - tag, err := GenerateArtifactTagName(context.TODO(), literalMap) - assert.NoError(t, err) - assert.NotEmpty(t, tag) - }) - } -}