Merge branch '1.5.7' into show-explore-tree-display-2-for-1.5.7

open-metadata · Oct 16, 2024 · d9e85df · d9e85df
2 parents 0cdbd37 + 6a4c685
commit d9e85df
Show file tree

Hide file tree

Showing 16 changed files with 419 additions and 71 deletions.
diff --git a/.github/workflows/maven-sonar-build.yml b/.github/workflows/maven-sonar-build.yml
@@ -16,7 +16,7 @@ on:
   push:
     branches:
       - main
-      - '0.[0-9]+.[0-9]+'
+      - '[0-9]+.[0-9]+.[0-9]+'
     paths:
       - "openmetadata-service/**"
       - "openmetadata-ui/**"
@@ -116,4 +116,4 @@ jobs:
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           fail_on_test_failures: true
-          report_paths: 'openmetadata-service/target/surefire-reports/TEST-*.xml'
+          report_paths: 'openmetadata-service/target/surefire-reports/TEST-*.xml'
diff --git a/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java b/openmetadata-service/src/main/java/org/openmetadata/csv/CsvUtil.java
@@ -112,7 +112,7 @@ public static List<String> fieldToInternalArray(String field) {
   }
 
   /**
-   * Parses a field containing key-value pairs separated by semicolons, correctly handling quotes.
+   * Parses a field containing key-value pairs separated by FIELD_SEPARATOR, correctly handling quotes.
    * Each key-value pair may also be enclosed in quotes, especially if it contains delimiter like (SEPARATOR , FIELD_SEPARATOR).
    * Input Example:
    * "key1:value1;key2:value2;\"key3:value;with;semicolon\""
@@ -124,7 +124,8 @@ public static List<String> fieldToExtensionStrings(String field) throws IOExcept
       return List.of();
     }
 
-    // Replace semicolons within quoted strings with a placeholder
+    // Case when semicolon is part of the fieldValue - Replace semicolons within quoted strings with
+    // a placeholder
     String preprocessedField =
         Pattern.compile("\"([^\"]*)\"") // Matches content inside double quotes
             .matcher(field)
@@ -146,9 +147,7 @@ public static List<String> fieldToExtensionStrings(String field) throws IOExcept
           .flatMap(CSVRecord::stream)
           .map(
               value ->
-                  value
-                      .replace("__SEMICOLON__", ";")
-                      .replace("\\n", "\n")) // Restore original semicolons and newlines
+                  value.replace("__SEMICOLON__", ";")) // Restore original semicolons and newlines
           .map(
               value ->
                   value.startsWith("\"") && value.endsWith("\"") // Remove outer quotes if present
@@ -158,6 +157,48 @@ public static List<String> fieldToExtensionStrings(String field) throws IOExcept
     }
   }
 
+  /**
+   * Parses a field containing column values separated by SEPARATOR, correctly handling quotes.
+   * Each value  enclosed in quotes, especially if it contains delimiter like SEPARATOR.
+   * Input Example:
+   * "value1,value2,\"value,with,comma\""
+   * Output: [value1, value2, value,with,comma]
+   *
+   */
+  public static List<String> fieldToColumns(String field) throws IOException {
+    if (field == null || field.isBlank()) {
+      return Collections.emptyList();
+    }
+
+    // Case when comma is part of the columnValue - Replace commas within quoted strings with a
+    // placeholder
+    String preprocessedField =
+        Pattern.compile("\"([^\"]*)\"")
+            .matcher(field)
+            .replaceAll(mr -> "\"" + mr.group(1).replace(",", "__COMMA__") + "\"");
+
+    preprocessedField = preprocessedField.replace("\n", "\\n").replace("\"", "\\\"");
+
+    CSVFormat format = CSVFormat.DEFAULT.withDelimiter(',').withQuote('"').withEscape('\\');
+
+    List<String> columns;
+    try (CSVParser parser = CSVParser.parse(new StringReader(preprocessedField), format)) {
+      columns =
+          parser.getRecords().stream()
+              .flatMap(CSVRecord::stream)
+              .map(value -> value.replace("__COMMA__", ","))
+              .map(
+                  value ->
+                      value.startsWith("\"")
+                              && value.endsWith("\"") // Remove outer quotes if present
+                          ? value.substring(1, value.length() - 1)
+                          : value)
+              .collect(Collectors.toList());
+    }
+
+    return columns;
+  }
+
   public static String quote(String field) {
     return String.format("\"%s\"", field);
   }
@@ -270,6 +311,13 @@ private static String quoteCsvField(String str) {
     return str;
   }
 
+  private static String quoteCsvFieldForSeparator(String str) {
+    if (str.contains(SEPARATOR)) {
+      return quote(str);
+    }
+    return str;
+  }
+
   public static List<String> addExtension(List<String> csvRecord, Object extension) {
     if (extension == null) {
       csvRecord.add(null);
@@ -310,6 +358,8 @@ private static String formatMapValue(Map<String, Object> valueMap) {
       return formatEntityReference(valueMap);
     } else if (isTimeInterval(valueMap)) {
       return formatTimeInterval(valueMap);
+    } else if (isTableType(valueMap)) {
+      return formatTableRows(valueMap);
     }
 
     return valueMap.toString();
@@ -339,11 +389,30 @@ private static boolean isTimeInterval(Map<String, Object> valueMap) {
     return valueMap.containsKey("start") && valueMap.containsKey("end");
   }
 
+  private static boolean isTableType(Map<String, Object> valueMap) {
+    return valueMap.containsKey("rows") && valueMap.containsKey("columns");
+  }
+
   private static String formatEntityReference(Map<String, Object> valueMap) {
     return valueMap.get("type") + ENTITY_TYPE_SEPARATOR + valueMap.get("fullyQualifiedName");
   }
 
   private static String formatTimeInterval(Map<String, Object> valueMap) {
     return valueMap.get("start") + ENTITY_TYPE_SEPARATOR + valueMap.get("end");
   }
+
+  private static String formatTableRows(Map<String, Object> valueMap) {
+    List<String> columns = (List<String>) valueMap.get("columns");
+    List<Map<String, Object>> rows = (List<Map<String, Object>>) valueMap.get("rows");
+
+    return rows.stream()
+        .map(
+            row ->
+                columns.stream()
+                    .map(
+                        column ->
+                            quoteCsvFieldForSeparator(row.getOrDefault(column, "").toString()))
+                    .collect(Collectors.joining(SEPARATOR)))
+        .collect(Collectors.joining(INTERNAL_ARRAY_SEPARATOR));
+  }
 }
diff --git a/openmetadata-service/src/main/java/org/openmetadata/csv/EntityCsv.java b/openmetadata-service/src/main/java/org/openmetadata/csv/EntityCsv.java
@@ -18,6 +18,7 @@
 import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
 import static org.openmetadata.csv.CsvUtil.ENTITY_TYPE_SEPARATOR;
 import static org.openmetadata.csv.CsvUtil.FIELD_SEPARATOR;
+import static org.openmetadata.csv.CsvUtil.fieldToColumns;
 import static org.openmetadata.csv.CsvUtil.fieldToEntities;
 import static org.openmetadata.csv.CsvUtil.fieldToExtensionStrings;
 import static org.openmetadata.csv.CsvUtil.fieldToInternalArray;
@@ -40,6 +41,8 @@
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -66,6 +69,7 @@
 import org.openmetadata.schema.type.csv.CsvFile;
 import org.openmetadata.schema.type.csv.CsvHeader;
 import org.openmetadata.schema.type.csv.CsvImportResult;
+import org.openmetadata.schema.type.customproperties.TableConfig;
 import org.openmetadata.service.Entity;
 import org.openmetadata.service.TypeRegistry;
 import org.openmetadata.service.jdbi3.EntityRepository;
@@ -379,7 +383,7 @@ private void validateExtension(
               parseEntityReferences(printer, csvRecord, fieldNumber, fieldValue.toString(), isList);
         }
         case "date-cp", "dateTime-cp", "time-cp" -> fieldValue =
-            getFormattedDateTimeField(
+            parseFormattedDateTimeField(
                 printer,
                 csvRecord,
                 fieldNumber,
@@ -392,19 +396,13 @@ private void validateExtension(
           fieldValue = enumKeys.isEmpty() ? null : enumKeys;
         }
         case "timeInterval" -> fieldValue =
-            handleTimeInterval(printer, csvRecord, fieldNumber, fieldName, fieldValue);
-        case "number", "integer", "timestamp" -> {
-          try {
-            fieldValue = Long.parseLong(fieldValue.toString());
-          } catch (NumberFormatException e) {
-            importFailure(
-                printer,
-                invalidCustomPropertyValue(
-                    fieldNumber, fieldName, customPropertyType, fieldValue.toString()),
-                csvRecord);
-            fieldValue = null;
-          }
-        }
+            parseTimeInterval(printer, csvRecord, fieldNumber, fieldName, fieldValue);
+        case "number", "integer", "timestamp" -> fieldValue =
+            parseLongField(
+                printer, csvRecord, fieldNumber, fieldName, customPropertyType, fieldValue);
+        case "table-cp" -> fieldValue =
+            parseTableType(printer, csvRecord, fieldNumber, fieldName, fieldValue, propertyConfig);
+
         default -> {}
       }
       // Validate the field against the JSON schema
@@ -448,7 +446,7 @@ private Object parseEntityReferences(
     return isList ? entityReferences : entityReferences.isEmpty() ? null : entityReferences.get(0);
   }
 
-  protected String getFormattedDateTimeField(
+  protected String parseFormattedDateTimeField(
       CSVPrinter printer,
       CSVRecord csvRecord,
       int fieldNumber,
@@ -484,7 +482,7 @@ protected String getFormattedDateTimeField(
     }
   }
 
-  private Map<String, Long> handleTimeInterval(
+  private Map<String, Long> parseTimeInterval(
       CSVPrinter printer, CSVRecord csvRecord, int fieldNumber, String fieldName, Object fieldValue)
       throws IOException {
     List<String> timestampValues = fieldToEntities(fieldValue.toString());
@@ -511,6 +509,70 @@ private Map<String, Long> handleTimeInterval(
     return timestampMap;
   }
 
+  private Object parseLongField(
+      CSVPrinter printer,
+      CSVRecord csvRecord,
+      int fieldNumber,
+      String fieldName,
+      String customPropertyType,
+      Object fieldValue)
+      throws IOException {
+    try {
+      return Long.parseLong(fieldValue.toString());
+    } catch (NumberFormatException e) {
+      importFailure(
+          printer,
+          invalidCustomPropertyValue(
+              fieldNumber, fieldName, customPropertyType, fieldValue.toString()),
+          csvRecord);
+      return null;
+    }
+  }
+
+  private Object parseTableType(
+      CSVPrinter printer,
+      CSVRecord csvRecord,
+      int fieldNumber,
+      String fieldName,
+      Object fieldValue,
+      String propertyConfig)
+      throws IOException {
+    List<String> tableValues = listOrEmpty(fieldToInternalArray(fieldValue.toString()));
+    List<Map<String, String>> rows = new ArrayList<>();
+    TableConfig tableConfig =
+        JsonUtils.treeToValue(JsonUtils.readTree(propertyConfig), TableConfig.class);
+
+    for (String row : tableValues) {
+      List<String> columns = listOrEmpty(fieldToColumns(row));
+      Map<String, String> rowMap = new LinkedHashMap<>();
+      Iterator<String> columnIterator = tableConfig.getColumns().iterator();
+      Iterator<String> valueIterator = columns.iterator();
+
+      if (columns.size() > tableConfig.getColumns().size()) {
+        importFailure(
+            printer,
+            invalidCustomPropertyValue(
+                fieldNumber,
+                fieldName,
+                "table",
+                "Column count should be less than or equal to " + tableConfig.getColumns().size()),
+            csvRecord);
+        return null;
+      }
+
+      while (columnIterator.hasNext() && valueIterator.hasNext()) {
+        rowMap.put(columnIterator.next(), valueIterator.next());
+      }
+
+      rows.add(rowMap);
+    }
+
+    Map<String, Object> tableJson = new LinkedHashMap<>();
+    tableJson.put("rows", rows);
+    tableJson.put("columns", tableConfig.getColumns());
+    return tableJson;
+  }
+
   private void validateAndUpdateExtension(
       CSVPrinter printer,
       CSVRecord csvRecord,