diff --git a/pipeline.yaml b/pipeline.yaml index 0e75e509c1..d99ef33ed3 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -53,687 +53,692 @@ parameters: variables: runTests: True - CONDA_CACHE_DIR: /usr/share/miniconda/envs + CONDA_ENV: synapseml + CONDA_CACHE_DIR: /usr/share/miniconda/envs/$(CONDA_ENV)/ ComponentDetection.Timeout: 900 isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')] -jobs: -- job: Style - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - - task: AzureCLI@2 - displayName: 'Scala Style Check' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt scalastyle test:scalastyle' - - template: templates/conda.yml - - bash: | - set -e - source activate synapseml - black --diff --color . && black --check -q . - displayName: 'Python Style Check' - -- job: Publish - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - steps: - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: MavenAuthenticate@0 - name: mavenAuthPublicPackages - displayName: Authenticate SynapseML_PublicPackages - inputs: - artifactsFeeds: SynapseML_PublicPackages - mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection - - task: AzureCLI@2 - displayName: 'Publish Artifacts' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | +stages: +- stage: Lint + displayName: "Lint" + dependsOn: + jobs: + - job: Style + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + - task: AzureCLI@2 + displayName: 'Scala Style Check' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt scalastyle test:scalastyle' + - template: templates/conda.yml + - bash: | set -e - sudo apt-get install graphviz doxygen -y source activate synapseml - sbt packagePython uploadNotebooks - sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython - sbt genBuildInfo - echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" - sbt -DskipCodegen=true publishLocalSigned - python tools/esrp/prepare_jar.py - env: - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - SYNAPSEML_ENABLE_PUBLISH: true - - task: AzureCLI@2 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - set -e - sbt publishBadges - condition: and(succeeded(), eq(variables.isMaster, true)) - displayName: Publish Badges + black --diff --color . && black --check -q . + displayName: 'Python Style Check' +- stage: Build + displayName: "Build and Publish" + dependsOn: + jobs: + - job: BuildAndCacheCondaEnv + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + - template: templates/conda.yml + - bash: df -H -- job: E2E - timeoutInMinutes: 120 - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - databricks-cpu: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" - databricks-gpu: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" - databricks-rapids: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" - synapse: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" -# ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: -# synapse-internal: -# TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests" - steps: - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - template: templates/publish.yml - - task: AzureCLI@2 - displayName: 'E2E' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - set -e - source activate synapseml - sbt "testOnly $(TEST-CLASS)" - condition: and(succeeded(), eq(variables.runTests, 'True')) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/test-reports/TEST-*.xml' - failTaskOnFailedTests: true - condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) + - job: Publish + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: MavenAuthenticate@0 + name: mavenAuthPublicPackages + displayName: Authenticate SynapseML_PublicPackages + inputs: + artifactsFeeds: SynapseML_PublicPackages + mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection + - task: AzureCLI@2 + displayName: 'Publish Artifacts' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + sudo apt-get install graphviz doxygen -y + source activate synapseml + sbt packagePython uploadNotebooks + sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython + sbt genBuildInfo + echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" + sbt -DskipCodegen=true publishLocalSigned + python tools/esrp/prepare_jar.py + env: + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + SYNAPSEML_ENABLE_PUBLISH: true + - task: AzureCLI@2 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + sbt publishBadges + condition: and(succeeded(), eq(variables.isMaster, true)) + displayName: Publish Badges -# -#- job: PublishDocker -# displayName: PublishDocker -# pool: -# vmImage: ubuntu-20.04 -# steps: -# - task: AzureCLI@2 -# displayName: 'Get Docker Tag + Version' -# inputs: -# azureSubscription: 'SynapseML Build' -# scriptLocation: inlineScript -# scriptType: bash -# inlineScript: | -# VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g') -# echo '##vso[task.setvariable variable=version]'$VERSION -# echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD) -# - task: Docker@2 -# displayName: Demo Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-demo' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/demo/Dockerfile' -# tags: $(version) -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# displayName: Demo Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-demo' -# command: 'push' -# tags: $(version) -# - task: Docker@2 -# displayName: Minimal Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-minimal' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/minimal/Dockerfile' -# tags: $(version) -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# displayName: Minimal Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-minimal' -# command: 'push' -# tags: $(version) -# - task: Docker@2 -# condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) -# displayName: Release Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/release' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/demo/Dockerfile' -# tags: | -# $(version) -# latest -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) -# displayName: Release Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/release' -# command: 'push' -# tags: | -# $(version) -# latest -# - task: ComponentGovernanceComponentDetection@0 + - job: WebsiteAutoDeployment + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + steps: + - checkout: self + fetchDepth: 1 + clean: true + submodules: false + lfs: false + persistCredentials: true + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: NodeTool@0 + inputs: + versionSpec: '16.x' + displayName: 'Install Node.js' + - task: AzureCLI@2 + displayName: 'Convert notebooks to markdowns' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + source activate synapseml + sbt convertNotebooks + - bash: | + set -e + yarn install + cd website + yarn + yarn build + displayName: 'yarn install and build' + - bash: | + set -e + git config --global user.name "${GH_NAME}" + git config --global user.email "${GH_EMAIL}" + git checkout -b main + echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc + cd website + GIT_USER="${GH_NAME}" yarn deploy + condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) + env: + GH_NAME: $(gh-name) + GH_EMAIL: $(gh-email) + GH_TOKEN: $(gh-token) + displayName: 'yarn deploy' -- job: Release - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/update_cli.yml - - bash: | - echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) - displayName: 'Get Git Tag' - - bash: | - set -e - wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 - chmod +x git-chglog_linux_amd64 - ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - - task: GitHubRelease@0 - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - inputs: - gitHubConnection: 'MMLSpark Github' - repositoryName: '$(Build.Repository.Name)' - action: 'create' - target: '$(Build.SourceVersion)' - tagSource: 'auto' - releaseNotesFile: 'CHANGELOG.md' - isDraft: true - - bash: echo "##vso[task.prependpath]$CONDA/bin" - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - displayName: Add conda to PATH - - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) - displayName: Fix directory permissions - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - - task: Cache@2 - displayName: Use cached Anaconda environment - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - inputs: - key: 'conda | "$(Agent.OS)" | environment.yml' - restoreKeys: | - python | "$(Agent.OS)" - python - path: $(CONDA_CACHE_DIR) - cacheHitVar: CONDA_CACHE_RESTORED - - bash: | - conda env create --force -f environment.yml -v - condition: and(eq(variables.isMaster, true), and(startsWith(variables['tag'], 'v'), eq(variables.CONDA_CACHE_RESTORED, 'false'))) - displayName: Create Anaconda environment - - task: AzureKeyVault@1 - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - inputs: - azureSubscription: 'SynapseML Build' - keyVaultName: mmlspark-keys - - bash: | - set -e - source activate synapseml - sbt publishPypi - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - env: - STORAGE-KEY: $(storage-key) - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - PYPI-API-TOKEN: $(pypi-api-token) - SYNAPSEML_ENABLE_PUBLISH: true - displayName: 'publish python package to pypi' - - bash: | - set -e - source activate synapseml - sbt publishLocalSigned - python tools/esrp/prepare_jar.py - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - env: - STORAGE-KEY: $(storage-key) - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - SYNAPSEML_ENABLE_PUBLISH: true - displayName: 'publish jar package to maven central' - - task: EsrpRelease@7 - inputs: - ConnectedServiceName: 'DataScienceESRPRelease2024' - # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview - keyvaultname: 'synapseml-esrp-kv' - authcertname: 'ReleaseAutomation' - signcertname: 'ESRPReqSignCA' - # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false - clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6' - Intent: 'PackageDistribution' - ContentType: 'Maven' - contentsource: 'Folder' - folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/' - Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com' - Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com' - ServiceEndpointUrl: 'https://api.esrp.microsoft.com' - MainPublisher: 'synapseml' - DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - waitforreleasecompletion: true - displayName: 'ESRP Publish Package' - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) + # + #- job: PublishDocker + # displayName: PublishDocker + # pool: + # vmImage: ubuntu-20.04 + # steps: + # - template: templates/checkout.yml + # - task: AzureCLI@2 + # displayName: 'Get Docker Tag + Version' + # inputs: + # azureSubscription: 'SynapseML Build' + # scriptLocation: inlineScript + # scriptType: bash + # inlineScript: | + # VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g') + # echo '##vso[task.setvariable variable=version]'$VERSION + # echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD) + # - task: Docker@2 + # displayName: Demo Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-demo' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/demo/Dockerfile' + # tags: $(version) + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # displayName: Demo Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-demo' + # command: 'push' + # tags: $(version) + # - task: Docker@2 + # displayName: Minimal Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-minimal' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/minimal/Dockerfile' + # tags: $(version) + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # displayName: Minimal Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-minimal' + # command: 'push' + # tags: $(version) + # - task: Docker@2 + # condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) + # displayName: Release Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/release' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/demo/Dockerfile' + # tags: | + # $(version) + # latest + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) + # displayName: Release Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/release' + # command: 'push' + # tags: | + # $(version) + # latest + # - task: ComponentGovernanceComponentDetection@0 +- stage: E2E + displayName: "End to End Tests" + dependsOn: + jobs: + - job: E2E + timeoutInMinutes: 120 + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + databricks-cpu: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" + databricks-gpu: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" + databricks-rapids: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" + synapse: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" + # ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: + # synapse-internal: + # TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - template: templates/publish.yml + - task: AzureCLI@2 + displayName: 'E2E' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + source activate synapseml + sbt "testOnly $(TEST-CLASS)" + condition: and(succeeded(), eq(variables.runTests, 'True')) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/test-reports/TEST-*.xml' + failTaskOnFailedTests: true + condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) -- job: PythonTests - timeoutInMinutes: 120 - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-22.04 - strategy: - matrix: - core: - PACKAGE: "core" - deep-learning: - PACKAGE: "deepLearning" - lightgbm: - PACKAGE: "lightgbm" - opencv: - PACKAGE: "opencv" - vw: - PACKAGE: "vw" - cognitive: - PACKAGE: "cognitive" - steps: - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: AzureCLI@2 - displayName: 'Install and package deps' - timeoutInMinutes: 40 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - source activate synapseml - sbt coverage getDatasets installPipPackage - sbt publishM2 - - task: AzureCLI@2 - displayName: 'Test Python Code' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 40 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | +- stage: Release + displayName: "Release" + dependsOn: + jobs: + - job: Release + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) + steps: + - template: templates/checkout.yml + - template: templates/update_cli.yml + - bash: | + echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) + displayName: 'Get Git Tag' + - bash: | set -e - source activate synapseml - export SBT_OPTS="-XX:+UseG1GC" - echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" - echo "SBT_OPTS=$SBT_OPTS" - (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/python-test-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - - -- job: RTests - timeoutInMinutes: 60 - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - core: - PACKAGE: "core" - deep-learning: - PACKAGE: "deepLearning" - lightgbm: - PACKAGE: "lightgbm" - opencv: - PACKAGE: "opencv" - vw: - PACKAGE: "vw" - cognitive: - PACKAGE: "cognitive" - steps: - #- template: templates/ivy_cache_2.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: AzureCLI@2 - displayName: 'Prepare for tests' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 60 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | + wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 + chmod +x git-chglog_linux_amd64 + ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG + - task: GitHubRelease@0 + inputs: + gitHubConnection: 'MMLSpark Github' + repositoryName: '$(Build.Repository.Name)' + action: 'create' + target: '$(Build.SourceVersion)' + tagSource: 'auto' + releaseNotesFile: 'CHANGELOG.md' + isDraft: true + - template: templates/conda.yml + - task: AzureKeyVault@1 + inputs: + azureSubscription: 'SynapseML Build' + keyVaultName: mmlspark-keys + - bash: | set -e - export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" source activate synapseml - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - sbt codegen - sbt publishM2 - SPARK_VERSION=3.4.1 - HADOOP_VERSION=3 - wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - - task: AzureCLI@2 - displayName: 'Test R Code' - retryCountOnTaskFailure: 3 - timeoutInMinutes: 20 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | + sbt publishPypi + env: + STORAGE-KEY: $(storage-key) + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + PYPI-API-TOKEN: $(pypi-api-token) + SYNAPSEML_ENABLE_PUBLISH: true + displayName: 'publish python package to pypi' + - bash: | set -e - export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" source activate synapseml - timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/r-test-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - retryCountOnTaskFailure: 1 - displayName: 'Generate Codecov report' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - -- job: BuildAndCacheCondaEnv - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/conda.yml - - bash: df -H - -- job: WebsiteSamplesTests - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - template: templates/publish.yml - - task: AzureCLI@2 - displayName: 'Test Website Samples' - timeoutInMinutes: 30 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - (sbt coverage testWebsiteDocs) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/website-test-result.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - + sbt publishLocalSigned + python tools/esrp/prepare_jar.py + env: + STORAGE-KEY: $(storage-key) + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + SYNAPSEML_ENABLE_PUBLISH: true + displayName: 'publish jar package to maven central' + - task: EsrpRelease@7 + inputs: + ConnectedServiceName: 'DataScienceESRPRelease2024' + # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview + keyvaultname: 'synapseml-esrp-kv' + authcertname: 'ReleaseAutomation' + signcertname: 'ESRPReqSignCA' + # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false + clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6' + Intent: 'PackageDistribution' + ContentType: 'Maven' + contentsource: 'Folder' + folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/' + Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com' + Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com' + ServiceEndpointUrl: 'https://api.esrp.microsoft.com' + MainPublisher: 'synapseml' + DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' + waitforreleasecompletion: true + displayName: 'ESRP Publish Package' +- stage: Tests + displayName: "Unit Tests" + dependsOn: + jobs: + - job: PythonTests + timeoutInMinutes: 120 + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-22.04 + strategy: + matrix: + core: + PACKAGE: "core" + deep-learning: + PACKAGE: "deepLearning" + lightgbm: + PACKAGE: "lightgbm" + opencv: + PACKAGE: "opencv" + vw: + PACKAGE: "vw" + cognitive: + PACKAGE: "cognitive" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: AzureCLI@2 + displayName: 'Install and package deps' + timeoutInMinutes: 40 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + source activate synapseml + sbt coverage getDatasets installPipPackage + sbt publishM2 + - task: AzureCLI@2 + displayName: 'Test Python Code' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 40 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + source activate synapseml + export SBT_OPTS="-XX:+UseG1GC" + echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" + echo "SBT_OPTS=$SBT_OPTS" + (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/python-test-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml -- job: WebsiteAutoDeployment - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - steps: - - checkout: self - persistCredentials: true - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: NodeTool@0 - inputs: - versionSpec: '16.x' - displayName: 'Install Node.js' - - task: AzureCLI@2 - displayName: 'Convert notebooks to markdowns' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - source activate synapseml - sbt convertNotebooks - - bash: | - set -e - yarn install - cd website - yarn - yarn build - displayName: 'yarn install and build' - - bash: | - set -e - git config --global user.name "${GH_NAME}" - git config --global user.email "${GH_EMAIL}" - git checkout -b main - echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc - cd website - GIT_USER="${GH_NAME}" yarn deploy - condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) - env: - GH_NAME: $(gh-name) - GH_EMAIL: $(gh-email) - GH_TOKEN: $(gh-token) - displayName: 'yarn deploy' + - job: RTests + timeoutInMinutes: 60 + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + core: + PACKAGE: "core" + deep-learning: + PACKAGE: "deepLearning" + lightgbm: + PACKAGE: "lightgbm" + opencv: + PACKAGE: "opencv" + vw: + PACKAGE: "vw" + cognitive: + PACKAGE: "cognitive" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache_2.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: AzureCLI@2 + displayName: 'Prepare for tests' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 60 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" + source activate synapseml + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + sbt codegen + sbt publishM2 + SPARK_VERSION=3.4.1 + HADOOP_VERSION=3 + wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + - task: AzureCLI@2 + displayName: 'Test R Code' + retryCountOnTaskFailure: 3 + timeoutInMinutes: 20 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" + source activate synapseml + timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/r-test-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + retryCountOnTaskFailure: 1 + displayName: 'Generate Codecov report' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml + - job: WebsiteSamplesTests + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - template: templates/publish.yml + - task: AzureCLI@2 + displayName: 'Test Website Samples' + timeoutInMinutes: 30 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + (sbt coverage testWebsiteDocs) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/website-test-result.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml -- job: UnitTests - cancelTimeoutInMinutes: 1 - timeoutInMinutes: 80 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - automl: - PACKAGE: "automl" - causal: - PACKAGE: "causal" - onnx: - PACKAGE: "onnx" - geospatial: - PACKAGE: "services.geospatial" - anomaly: - PACKAGE: "services.anomaly" - FLAKY: "true" - bing: - PACKAGE: "services.bing" - FLAKY: "true" - face: - PACKAGE: "services.face" - FLAKY: "true" - form: - PACKAGE: "services.form" - FLAKY: "true" - language: - PACKAGE: "services.language" - FLAKY: "true" - openai: - PACKAGE: "services.openai" - FLAKY: "true" - search: - PACKAGE: "services.search" - FFMPEG: "true" - FLAKY: "true" - speech: - PACKAGE: "services.speech" - FFMPEG: "true" - FLAKY: "true" - text: - PACKAGE: "services.text" - FLAKY: "true" - translate: - PACKAGE: "services.translate" - FLAKY: "true" - vision: - PACKAGE: "services.vision" - FLAKY: "true" - core: - PACKAGE: "core" - explainers1: - PACKAGE: "explainers.split1" - explainers2: - PACKAGE: "explainers.split2" - explainers3: - PACKAGE: "explainers.split3" - exploratory: - PACKAGE: "exploratory" - featurize: - PACKAGE: "featurize" - image: - PACKAGE: "image" - io1: - PACKAGE: "io.split1" - FLAKY: "true" - io2: - PACKAGE: "io.split2" - FLAKY: "true" - isolationforest: - PACKAGE: "isolationforest" - flaky: - PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed - FLAKY: "true" - lightgbm1: - PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split - FLAKY: "true" - lightgbm2: - PACKAGE: "lightgbm.split2" - FLAKY: "true" - lightgbm3: - PACKAGE: "lightgbm.split3" - FLAKY: "true" - lightgbm4: - PACKAGE: "lightgbm.split4" - FLAKY: "true" - lightgbm5: - PACKAGE: "lightgbm.split5" - FLAKY: "true" - lightgbm6: - PACKAGE: "lightgbm.split6" - FLAKY: "true" - opencv: - PACKAGE: "opencv" - recommendation: - PACKAGE: "recommendation" - stages: - PACKAGE: "stages" - nn: - PACKAGE: "nn" - train: - PACKAGE: "train" - vw: - PACKAGE: "vw" - steps: - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - task: AzureCLI@2 - displayName: 'Setup repo' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests) - (${FFMPEG:-false} && sudo apt-get update && \ - sudo apt-get install ffmpeg libgstreamer1.0-0 \ - gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y) - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - - task: AzureCLI@2 - displayName: 'Unit Test' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 90 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - ulimit -c unlimited - export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT" - (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || - (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/test-reports/TEST-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/kv.yml - - template: templates/codecov.yml + - job: UnitTests + cancelTimeoutInMinutes: 1 + timeoutInMinutes: 80 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + automl: + PACKAGE: "automl" + causal: + PACKAGE: "causal" + onnx: + PACKAGE: "onnx" + geospatial: + PACKAGE: "services.geospatial" + anomaly: + PACKAGE: "services.anomaly" + FLAKY: "true" + bing: + PACKAGE: "services.bing" + FLAKY: "true" + face: + PACKAGE: "services.face" + FLAKY: "true" + form: + PACKAGE: "services.form" + FLAKY: "true" + language: + PACKAGE: "services.language" + FLAKY: "true" + openai: + PACKAGE: "services.openai" + FLAKY: "true" + search: + PACKAGE: "services.search" + FFMPEG: "true" + FLAKY: "true" + speech: + PACKAGE: "services.speech" + FFMPEG: "true" + FLAKY: "true" + text: + PACKAGE: "services.text" + FLAKY: "true" + translate: + PACKAGE: "services.translate" + FLAKY: "true" + vision: + PACKAGE: "services.vision" + FLAKY: "true" + core: + PACKAGE: "core" + explainers1: + PACKAGE: "explainers.split1" + explainers2: + PACKAGE: "explainers.split2" + explainers3: + PACKAGE: "explainers.split3" + exploratory: + PACKAGE: "exploratory" + featurize: + PACKAGE: "featurize" + image: + PACKAGE: "image" + io1: + PACKAGE: "io.split1" + FLAKY: "true" + io2: + PACKAGE: "io.split2" + FLAKY: "true" + isolationforest: + PACKAGE: "isolationforest" + flaky: + PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed + FLAKY: "true" + lightgbm1: + PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split + FLAKY: "true" + lightgbm2: + PACKAGE: "lightgbm.split2" + FLAKY: "true" + lightgbm3: + PACKAGE: "lightgbm.split3" + FLAKY: "true" + lightgbm4: + PACKAGE: "lightgbm.split4" + FLAKY: "true" + lightgbm5: + PACKAGE: "lightgbm.split5" + FLAKY: "true" + lightgbm6: + PACKAGE: "lightgbm.split6" + FLAKY: "true" + opencv: + PACKAGE: "opencv" + recommendation: + PACKAGE: "recommendation" + stages: + PACKAGE: "stages" + nn: + PACKAGE: "nn" + train: + PACKAGE: "train" + vw: + PACKAGE: "vw" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - task: AzureCLI@2 + displayName: 'Setup repo' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests) + (${FFMPEG:-false} && sudo apt-get update && \ + sudo apt-get install ffmpeg libgstreamer1.0-0 \ + gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y) + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + - task: AzureCLI@2 + displayName: 'Unit Test' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 90 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + ulimit -c unlimited + export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT" + (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || + (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/test-reports/TEST-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/kv.yml + - template: templates/codecov.yml diff --git a/templates/checkout.yml b/templates/checkout.yml new file mode 100644 index 0000000000..977867297b --- /dev/null +++ b/templates/checkout.yml @@ -0,0 +1,6 @@ +steps: + - checkout: self + fetchDepth: 1 + clean: true + submodules: false + lfs: false diff --git a/templates/conda.yml b/templates/conda.yml index 8c36f95929..a9ad3d063f 100644 --- a/templates/conda.yml +++ b/templates/conda.yml @@ -2,7 +2,9 @@ steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" displayName: Add conda to PATH retryCountOnTaskFailure: 1 - - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) + - bash: | + mkdir -p $(CONDA_CACHE_DIR) + sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) displayName: Fix directory permissions - task: Cache@2 displayName: Use cached Anaconda environment @@ -20,3 +22,9 @@ steps: displayName: Create Anaconda environment retryCountOnTaskFailure: 1 condition: eq(variables.CONDA_CACHE_RESTORED, 'false') + - bash: | + echo "system usage:" + sudo df -h + echo "conda cache usage:" + sudo du -h --max-depth=1 $(CONDA_CACHE_DIR) + displayName: Display disk usage