From 4c99bdff6281bf50566bf776b88ba4d1d64f846c Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Wed, 27 Mar 2024 17:26:32 -0700 Subject: [PATCH 1/7] Add phalanx environment install command Port the installer/install.sh script to Python as the new phalanx environment install command. Add a strongly worded confirmation message before proceeding, which can be overrriden with a command line flag for CI testing. Expand the Helm storage layer and add Argo CD and Kubernetes storage layers to support the installer. Use kubectl rather than the Python Kubernetes libraries to manipulate Kubernetes objects, since it's simpler and more straightforward for the type of actions the installer needs to take. Introduce a new way of passing around Vault credentials, and support authenticating to Vault with an AppRole, at least in the context of the installer. (The secrets and vault commands still require tokens.) Remove the old installer and switch GitHub Actions CI over to the new command. --- .github/workflows/ci.yaml | 42 ++-- docs/internals/api.rst | 12 ++ installer/install.sh | 312 ---------------------------- src/phalanx/cli.py | 106 ++++++++++ src/phalanx/exceptions.py | 48 ++++- src/phalanx/factory.py | 11 +- src/phalanx/github.py | 37 ++++ src/phalanx/models/vault.py | 42 ++++ src/phalanx/services/environment.py | 157 +++++++++++++- src/phalanx/storage/argocd.py | 206 ++++++++++++++++++ src/phalanx/storage/command.py | 13 +- src/phalanx/storage/config.py | 63 ++++++ src/phalanx/storage/helm.py | 86 +++++++- src/phalanx/storage/kubernetes.py | 80 +++++++ src/phalanx/storage/vault.py | 71 +++++-- tox.ini | 8 +- 16 files changed, 941 insertions(+), 353 deletions(-) delete mode 100755 installer/install.sh create mode 100644 src/phalanx/github.py create mode 100644 src/phalanx/storage/argocd.py create mode 100644 src/phalanx/storage/kubernetes.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b90aa8a0b2..99be62e916 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,10 +71,10 @@ jobs: cache-key-prefix: helm # The minikube job always runs, but it quickly does nothing if no files that - # would affect minikube were changed. This unfortunately requires a lot of + # would affect minikube were changed. This unfortunately requires a lot of # if conditionals on all the steps of the job, but we need the job to run so # that we can make it mandatory before merging, which in turn allows us to - # use automerge. + # use merge queues. minikube: name: Test deploy runs-on: ubuntu-latest @@ -99,7 +99,18 @@ jobs: - "environments/Chart.yaml" - "environments/templates/applications/infrastructure/*" - "environments/values-minikube.yaml" - - "installer/**" + - "src/phalanx/**" + + - name: Download installer dependencies + if: steps.filter.outputs.minikube == 'true' + run: | + curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.15.4/vault_1.15.4_linux_amd64.zip + unzip /tmp/vault.zip + sudo mv vault /usr/local/bin/vault + sudo chmod +x /usr/local/bin/vault + sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.8.6/argocd-linux-amd64 + sudo chmod +x /usr/local/bin/argocd + sudo apt-get install socat - name: Setup Minikube if: steps.filter.outputs.minikube == 'true' @@ -113,23 +124,18 @@ jobs: if: steps.filter.outputs.minikube == 'true' run: kubectl get nodes - - name: Download installer dependencies + - uses: lsst-sqre/run-tox@v1 if: steps.filter.outputs.minikube == 'true' - run: | - curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.15.4/vault_1.15.4_linux_amd64.zip - unzip /tmp/vault.zip - sudo mv vault /usr/local/bin/vault - sudo chmod +x /usr/local/bin/vault - sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.8.6/argocd-linux-amd64 - sudo chmod +x /usr/local/bin/argocd - sudo apt-get install socat - - - name: Run installer timeout-minutes: 15 - if: steps.filter.outputs.minikube == 'true' - run: | - cd installer - ./install.sh minikube "${{ secrets.MINIKUBE_VAULT_ROLE_ID }}" "${{ secrets.MINIKUBE_VAULT_SECRET_ID }}" + with: + python-version: "3.12" + tox-envs: install + tox-posargs: >- + --force-noninteractive + --vault-role-id=${{ secrets.MINIKUBE_VAULT_ROLE_ID }} + --vault-secret-id=${{ secrets.MINIKUBE_VAULT_SECRET_ID }} + minikube + cache-key-prefix: test - name: Get final list of resources if: steps.filter.outputs.minikube == 'true' diff --git a/docs/internals/api.rst b/docs/internals/api.rst index 6166bcd1f8..a608ffd85b 100644 --- a/docs/internals/api.rst +++ b/docs/internals/api.rst @@ -25,6 +25,9 @@ This API is only intended for use within the Phalanx code itself. .. automodapi:: phalanx.factory :include-all-objects: +.. automodapi:: phalanx.github + :include-all-objects: + .. automodapi:: phalanx.models.applications :include-all-objects: @@ -55,12 +58,21 @@ This API is only intended for use within the Phalanx code itself. .. automodapi:: phalanx.services.vault :include-all-objects: +.. automodapi:: phalanx.storage.argocd + :include-all-objects: + +.. automodapi:: phalanx.storage.command + :include-all-objects: + .. automodapi:: phalanx.storage.config :include-all-objects: .. automodapi:: phalanx.storage.helm :include-all-objects: +.. automodapi:: phalanx.storage.kubernetes + :include-all-objects: + .. automodapi:: phalanx.storage.onepassword :include-all-objects: diff --git a/installer/install.sh b/installer/install.sh deleted file mode 100755 index 78059e2016..0000000000 --- a/installer/install.sh +++ /dev/null @@ -1,312 +0,0 @@ -#!/bin/bash -e - -################################################################################ -# install.sh - Install script for the Rubin Science Platform -################################################################################ - -# Usage: -# ./install.sh ENVIRONMENT=env [VAULT_ROLE_ID= VAULT_SECRET_ID= | VAULT_TOKEN= - -# Arguments -# - The environment variable is mandatory and should be provided as the first argument. -# - If two positional arguments are provided, assume they are VAULT_ROLE_ID and VAULT_SECRET_ID. -# - If named arguments are provided, parse them for ENVIRONMENT, VAULT_ROLE_ID, VAULT_SECRET_ID, and VAULT_TOKEN. - -# Environment Configuration: -# The environment configuration is retrieved from ../environments/values-${ENVIRONMENT}.yaml. - -# Usage Examples: -# Using authentication with an approle: -# ./install.sh ENVIRONMENT=myenv VAULT_ROLE_ID=your-vault-id VAULT_SECRET_ID=your-secret -# ./install.sh myenv your-vault-id your-secret # Uses VAULT_ROLE_ID and VAULT_SECRET_ID -# -# Using authentication with a token: -# ./install.sh ENVIRONMENT=myenv VAULT_TOKEN=your-vault-token -# ./install.sh ENVIRONMENT=myenv VAULT_TOKEN=your-vault-token -# ./install.sh myenv VAULT_TOKEN=your-vault-token - -# Script Dependencies: -# - yq: Used for parsing YAML files. -# - vault: Used for interacting with HashiCorp Vault. -# - kubectl: Kubernetes command-line tool. -# - helm: Kubernetes package manager. - -# Notes: -# - The script assumes that the specified environment exists, and is available under ../environments/ and has the required charts under ../applications/ -# - It creates or updates Argo CD and Vault secrets based on the provided credentials. - -# Exit codes: -# - 0: Success -# - 1: Error - -################################################################################ - -USAGE="Usage: ./install.sh ENVIRONMENT=env [VAULT_ROLE_ID= VAULT_SECRET_ID= | VAULT_TOKEN=]" - -unset ENVIRONMENT -unset VAULT_ROLE_ID -unset VAULT_TOKEN -unset VAULT_SECRET_ID - -# Function to display usage and exit -display_usage() { - echo "$USAGE" - exit 1 -} - -# Function to create Kubernetes secret -create_kubernetes_secret() { - local namespace="$1" - shift - kubectl create secret generic vault-credentials \ - --namespace "$namespace" \ - --dry-run=client -o yaml "$@" | kubectl apply -f - -} - -# Function to check for dependencies -check_dependencies() { - local dependencies=("yq" "vault" "kubectl" "helm") - - for cmd in "${dependencies[@]}"; do - if ! command -v "$cmd" > /dev/null 2>&1; then - echo "Error: $cmd not found. Please install $cmd and try again." - exit 1 - fi - done -} - -# Check that the dependencies are installed -check_dependencies - -# Extract environment -if [[ $1 == ENVIRONMENT=* ]]; then - ENVIRONMENT="${1#*=}" - shift -elif [[ $1 =~ ^[a-zA-Z0-9_-]+$ ]]; then - ENVIRONMENT="$1" - shift -else - display_usage -fi - -# Extract named arguments -for arg in "$@"; do - case "$arg" in - environment=*) - ENVIRONMENT="${arg#*=}" - ;; - VAULT_ROLE_ID=*|vault_role_id=*) - VAULT_ROLE_ID="${arg#*=}" - ;; - VAULT_SECRET_ID=*|vault_secret_id=*) - VAULT_SECRET_ID="${arg#*=}" - ;; - VAULT_TOKEN=*|vault_token=*) - VAULT_TOKEN="${arg#*=}" - ;; - *) - ;; - esac -done - - -# If VAULT_ROLE_ID and VAULT_SECRET_ID are not set from named arguments, check positional arguments -if [ -z "$VAULT_ROLE_ID" ] || [ -z "$VAULT_SECRET_ID" ]; then - # If two positional arguments are provided, assume they are VAULT_ROLE_ID and VAULT_SECRET_ID - if [ $# -ge 2 ]; then - VAULT_ROLE_ID=$1 - VAULT_SECRET_ID=$2 - shift 2 - fi -fi - -# Get environment configuration -config="../environments/values-${ENVIRONMENT}.yaml" - -echo "Getting Git branch and remote information..." -GIT_URL=$(git config --get remote.origin.url) -# Github runs in a detached head state, but sets GITHUB_REF, -# extract the branch from it. If we're there, use that branch. -# git branch --show-current will return empty in deatached head. -GIT_BRANCH=${GITHUB_HEAD_REF:-$(git branch --show-current)} - -echo "Logging on to Vault..." - -VAULT_ADDR="" -if grep '^vaultUrl:' "$config" >/dev/null; then - VAULT_ADDR=$(yq -r .vaultUrl "$config") -else - VAULT_ADDR=$(yq -r .vaultUrl ../environments/values.yaml) -fi - -export VAULT_ADDR=$VAULT_ADDR - -# Check if VAULT_ROLE_ID and VAULT_SECRET_ID are provided, if so generate VAULT_TOKEN -if [ -n "$VAULT_ROLE_ID" ] && [ -n "$VAULT_SECRET_ID" ]; then - # If VAULT_TOKEN is not provided, generate it using VAULT_ROLE_ID and VAULT_SECRET_ID - if [ -z "$VAULT_TOKEN" ]; then - VAULT_TOKEN=$(vault write auth/approle/login role_id="$VAULT_ROLE_ID" secret_id="$VAULT_SECRET_ID" | grep 'token ' | awk '{ print $2 }') - fi -fi - -# Check if VAULT_ROLE_ID and VAULT_SECRET_ID are not provided, but VAULT_TOKEN is -if [ -z "$VAULT_ROLE_ID" ] || [ -z "$VAULT_SECRET_ID" ]; then - # Check if VAULT_TOKEN is provided - if [ -z "$VAULT_TOKEN" ]; then - echo "Invalid arguments provided. Please provide either VAULT_ROLE_ID and VAULT_SECRET_ID or VAULT_TOKEN." - display_usage - fi -fi - -export VAULT_TOKEN=$VAULT_TOKEN - -VAULT_PATH_PREFIX=$(yq -r .vaultPathPrefix "$config") -ARGOCD_PASSWORD=$(vault kv get --field=admin.plaintext_password "$VAULT_PATH_PREFIX"/argocd) - -echo "Putting Vault credentials in a secret for vault-secrets-operator..." -# The namespace may not exist already, but don't error if it does. -kubectl create ns vault-secrets-operator || true - -# Create Kubernetes secret based on authentication method -if [ -n "$VAULT_ROLE_ID" ] && [ -n "$VAULT_SECRET_ID" ]; then - create_kubernetes_secret "vault-secrets-operator" \ - --from-literal=VAULT_ROLE_ID="$VAULT_ROLE_ID" \ - --from-literal=VAULT_SECRET_ID="$VAULT_SECRET_ID" -elif [ -n "$VAULT_TOKEN" ]; then - create_kubernetes_secret "vault-secrets-operator" \ - --from-literal=VAULT_TOKEN="$VAULT_TOKEN" -else - echo "Invalid arguments provided. Please provide either VAULT_ROLE_ID and VAULT_SECRET_ID or VAULT_TOKEN." - display_usage -fi - -# Argo CD depends a Vault-created secret for its credentials, so -# vault-secrets-operator has to be installed first. -echo "Updating or installing vault-secrets-operator..." -helm dependency update ../applications/vault-secrets-operator -helm upgrade vault-secrets-operator ../applications/vault-secrets-operator \ - --install \ - --values ../applications/vault-secrets-operator/values.yaml \ - --values "../applications/vault-secrets-operator/values-$ENVIRONMENT.yaml" \ - --set "vault-secrets-operator.vault.address=$VAULT_ADDR" \ - --create-namespace \ - --namespace vault-secrets-operator \ - --timeout 5m \ - --wait - -echo "Updating or installing Argo CD using Helm..." -helm dependency update ../applications/argocd -helm upgrade argocd ../applications/argocd \ - --install \ - --values ../applications/argocd/values.yaml \ - --values "../applications/argocd/values-$ENVIRONMENT.yaml" \ - --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ - --create-namespace \ - --namespace argocd \ - --timeout 5m \ - --wait - -echo "Logging in to Argo CD..." -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" - -echo "Creating the top-level Argo CD application..." -argocd app create science-platform \ - --repo "$GIT_URL" \ - --path environments --dest-namespace default \ - --dest-server https://kubernetes.default.svc \ - --upsert \ - --revision "$GIT_BRANCH" \ - --port-forward \ - --port-forward-namespace argocd \ - --helm-set "repoUrl=$GIT_URL" \ - --helm-set "targetRevision=$GIT_BRANCH" \ - --values values.yaml \ - --values "values-$ENVIRONMENT.yaml" - -echo "Syncing the top-level Argo CD application..." -argocd app sync science-platform \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 - -echo "Moving the top-level Argo CD application into infrastructure..." -argocd app set science-platform --project infrastructure \ - --port-forward \ - --port-forward-namespace argocd - -echo "Syncing Argo CD..." -timeout 30 argocd app sync argocd \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 || \ -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" && \ -timeout 30 argocd app sync argocd \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 - -echo "Waiting for Argo CD to finish syncing..." -kubectl -n argocd rollout status deployment/argocd-server -kubectl -n argocd rollout status deployment/argocd-repo-server -kubectl -n argocd rollout status statefulset/argocd-application-controller - -echo "Logging in to Argo CD..." -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" - -if [ "$(yq -r '.applications."ingress-nginx"' "$config")" != "false" ]; then - echo "Syncing ingress-nginx..." - argocd app sync ingress-nginx \ - --port-forward \ - --port-forward-namespace argocd -fi - -if [ "$(yq -r '.applications."cert-manager"' "$config")" != "false" ]; then - echo "Syncing cert-manager..." - argocd app sync cert-manager \ - --port-forward \ - --port-forward-namespace argocd && \ - - # Wait for the cert-manager's webhook to finish deploying by running - # kubectl, argocd's sync doesn't seem to wait for this to finish. - kubectl -n cert-manager rollout status deployment/cert-manager-webhook -fi - -if [ "$(yq -r .applications.postgres "$config")" == "true" ]; then - echo "Syncing postgres..." - argocd app sync postgres \ - --port-forward \ - --port-forward-namespace argocd -fi - -if [ "$(yq -r .applications.gafaelfawr "$config")" != "false" ]; then - echo "Syncing gafaelfawr..." - argocd app sync gafaelfawr \ - --port-forward \ - --port-forward-namespace argocd -fi - -echo "Syncing remaining applications..." -argocd app sync -l "argocd.argoproj.io/instance=science-platform" \ - --port-forward \ - --port-forward-namespace argocd - -echo '' -echo "You can now check on your Argo CD installation by running:" -echo "kubectl port-forward service/argocd-server -n argocd 8080:443" -echo "For the ArgoCD admin password:" -echo "vault kv get --field=admin.plaintext_password $VAULT_PATH_PREFIX/argocd" diff --git a/src/phalanx/cli.py b/src/phalanx/cli.py index eb7e518330..03b515aaa6 100644 --- a/src/phalanx/cli.py +++ b/src/phalanx/cli.py @@ -18,6 +18,11 @@ from .models.environments import EnvironmentConfig from .models.helm import HelmStarter from .models.secrets import ConditionalSecretConfig, StaticSecrets +from .models.vault import ( + VaultAppRoleCredentials, + VaultCredentials, + VaultTokenCredentials, +) __all__ = [ "help", @@ -29,6 +34,7 @@ "application_lint_all", "application_template", "environment", + "environment_install", "environment_lint", "environment_schema", "environment_template", @@ -46,6 +52,19 @@ "vault_export_secrets", ] +_INSTALL_WARNING = """\ +WARNING: This will install the entire {environment} Phalanx environment +into whatever Kubernetes cluster is currently configured as your default +cluster. + +THIS WILL OVERWRITE THE APPLICATIONS IN YOUR CURRENT KUBERNETES CLUSTER. + +If you have not verified, with kubectl config current-context, that this is +the correct cluster immediately before running this command, answer no and +double-check the cluster before continuing. +""" +"""Warning message displayed by :command:`phalanx environment install`.""" + def _find_config() -> Path: """Find the root of the Phalanx configuration tree. @@ -334,6 +353,93 @@ def environment() -> None: """Commands for Phalanx environment configuration.""" +@environment.command("install") +@click.argument("environment") +@click.option( + "-c", + "--config", + type=click.Path(path_type=Path), + default=None, + help="Path to root of Phalanx configuration.", +) +@click.option( + "--git-branch", + default=None, + envvar="GITHUB_HEAD_REF", + help="Override Git branch for Argo CD.", +) +@click.option( + "--force-noninteractive", + default=False, + is_flag=True, + help="Force installation without a prompt.", +) +@click.option( + "--vault-role-id", + default=None, + envvar="VAULT_ROLE_ID", + help="Role ID for vault-secrets-operator.", +) +@click.option( + "--vault-secret-id", + default=None, + envvar="VAULT_SECRET_ID", + help="Secret ID for vault-secrets-operator.", +) +@click.option( + "--vault-token", + default=None, + envvar="VAULT_TOKEN", + help="Read-only token for vault-secrets-operator.", +) +def environment_install( + environment: str, + *, + config: Path | None, + force_noninteractive: bool = False, + git_branch: str | None = None, + vault_role_id: str | None = None, + vault_secret_id: str | None = None, + vault_token: str | None = None, +) -> None: + """Install Phalanx into an environment. + + Bootstrap Phalanx for an environment. Assumes that the currently enabled + Kubernetes configuration is the cluster into which to install Phalanx. + + The secrets tree for the environment must already be present in Vault. + Read-only Vault credentials must be supplied by either setting the + environment variables VAULT_ROLE_ID and VAULT_SECRET_ID to the credentials + of a Vault AppRole, or setting VAULT_TOKEN to a read-only Vault token. + """ + if not config: + config = _find_config() + factory = Factory(config) + if vault_role_id and vault_secret_id: + vault_credentials: VaultCredentials = VaultAppRoleCredentials( + role_id=vault_role_id, secret_id=vault_secret_id + ) + elif vault_token: + vault_credentials = VaultTokenCredentials(token=vault_token) + else: + msg = ( + "Either VAULT_TOKEN or both VAULT_ROLE_ID and VAULT_SECRET_ID" + " must be set" + ) + raise click.UsageError(msg) + + # Prompt the user unless they specifically said not to. + if not force_noninteractive: + print(_INSTALL_WARNING.format(environment=environment)) + click.confirm( + "Are you certain you want to continue?", abort=True, default=False + ) + + # Do the installation. + environment_service = factory.create_environment_service() + environment_service.install(environment, vault_credentials, git_branch) + + @environment.command("lint") @click.argument("environment", required=False) @click.option( diff --git a/src/phalanx/exceptions.py b/src/phalanx/exceptions.py index 79628a701b..aa627b29c9 100644 --- a/src/phalanx/exceptions.py +++ b/src/phalanx/exceptions.py @@ -10,6 +10,8 @@ __all__ = [ "ApplicationExistsError", "CommandFailedError", + "CommandTimedOutError", + "GitRemoteError", "InvalidApplicationConfigError", "InvalidEnvironmentConfigError", "InvalidSecretConfigError", @@ -70,6 +72,43 @@ def __init__( self.stderr = exc.stderr +class CommandTimedOutError(Exception): + """Execution of a command failed. + + Parameters + ---------- + command + Command being run. + args + Arguments to that command. + exc + Exception reporting the failure. + + Attributes + ---------- + stdout + Standard output from the failed command. + stderr + Standard error from the failed command. + """ + + def __init__( + self, + command: str, + args: Iterable[str], + exc: subprocess.TimeoutExpired, + ) -> None: + args_str = " ".join(args) + msg = f"{command} {args_str} timed out after {exc.timeout}s" + super().__init__(msg) + self.stdout = exc.stdout + self.stderr = exc.stderr + + +class GitRemoteError(Exception): + """Unable to get necessary information from a Git remote.""" + + class InvalidApplicationConfigError(Exception): """Configuration for an application is invalid. @@ -218,8 +257,13 @@ class VaultNotFoundError(Exception): Base URL of the Vault server. path Path that was not found. + key + If provided, key within that path that was not found. """ - def __init__(self, url: str, path: str) -> None: - msg = f"Vault secret {path} not found in server {url}" + def __init__(self, url: str, path: str, key: str | None = None) -> None: + if key: + msg = f"Vault key {key} not found in secret {path} on server {url}" + else: + msg = f"Vault secret {path} not found in server {url}" super().__init__(msg) diff --git a/src/phalanx/factory.py b/src/phalanx/factory.py index 4e03ed0cf9..8de5f8d1f4 100644 --- a/src/phalanx/factory.py +++ b/src/phalanx/factory.py @@ -8,8 +8,10 @@ from .services.environment import EnvironmentService from .services.secrets import SecretsService from .services.vault import VaultService +from .storage.argocd import ArgoCDStorage from .storage.config import ConfigStorage from .storage.helm import HelmStorage +from .storage.kubernetes import KubernetesStorage from .storage.onepassword import OnepasswordStorage from .storage.vault import VaultStorage @@ -59,8 +61,13 @@ def create_environment_service(self) -> EnvironmentService: Service for manipulating environments. """ config_storage = self.create_config_storage() - helm_storage = HelmStorage(config_storage) - return EnvironmentService(config_storage, helm_storage) + return EnvironmentService( + config_storage=config_storage, + argocd_storage=ArgoCDStorage(), + kubernetes_storage=KubernetesStorage(), + helm_storage=HelmStorage(config_storage), + vault_storage=VaultStorage(), + ) def create_secrets_service(self) -> SecretsService: """Create service for manipulating Phalanx secrets. diff --git a/src/phalanx/github.py b/src/phalanx/github.py new file mode 100644 index 0000000000..c0ffd1bc3a --- /dev/null +++ b/src/phalanx/github.py @@ -0,0 +1,37 @@ +"""Utility functions used when running under GitHub Actions. + +The utility functions in this module can all be called unconditionally. They +will detect whether the Phalanx command-line tool is being run under GitHub +Actions and suppress the GitHub-specific command output if not. +""" + +from __future__ import annotations + +import os +from collections.abc import Iterator +from contextlib import contextmanager + +__all__ = [ + "action_group", +] + + +@contextmanager +def action_group(title: str) -> Iterator[None]: + """Wrap a sequence of commands in a GitHub Actions group. + + Must be used as a context manager. Any output produced by code that runs + within that context manager will be wrapped into a GitHub Actions display + group with the given title. + + Parameters + ---------- + title + Title of display group. + """ + in_github_actions = os.getenv("GITHUB_ACTIONS") == "true" + if in_github_actions: + print(f"::group::{title}", flush=True) + yield + if in_github_actions: + print("::endgroup::", flush=True) diff --git a/src/phalanx/models/vault.py b/src/phalanx/models/vault.py index c61c1b86ad..261f68f51e 100644 --- a/src/phalanx/models/vault.py +++ b/src/phalanx/models/vault.py @@ -2,6 +2,7 @@ from __future__ import annotations +from abc import ABC, abstractmethod from base64 import b64encode from datetime import datetime @@ -12,8 +13,11 @@ __all__ = [ "VaultAppRole", + "VaultAppRoleCredentials", "VaultAppRoleMetadata", + "VaultCredentials", "VaultToken", + "VaultTokenCredentials", "VaultTokenMetadata", ] @@ -62,6 +66,44 @@ def to_yaml(self) -> str: return yaml.dump(self.model_dump()) +class VaultCredentials(BaseModel, ABC): + """Credentials used for Vault access. + + Can hold either AppRole credentials or a simple token, but always holds + one or the other. + """ + + @abstractmethod + def to_secret_data(self) -> dict[str, str]: + """Construct the corresponding vault-secrets-operator secret.""" + + +class VaultAppRoleCredentials(VaultCredentials): + """Credentials for Vault access using an AppRole.""" + + role_id: str + """Unique identifier of the AppRole.""" + + secret_id: str + """Authentication credentials for the AppRole.""" + + def to_secret_data(self) -> dict[str, str]: + return { + "VAULT_ROLE_ID": self.role_id, + "VAULT_SECRET_ID": self.secret_id, + } + + +class VaultTokenCredentials(VaultCredentials): + """Credentials for Vault access using a token.""" + + token: str + """Vault token.""" + + def to_secret_data(self) -> dict[str, str]: + return {"VAULT_TOKEN": self.token} + + class VaultTokenMetadata(BaseModel): """Metadata about a new or existing Vault token.""" diff --git a/src/phalanx/services/environment.py b/src/phalanx/services/environment.py index f913c69daa..af6ae14ef7 100644 --- a/src/phalanx/services/environment.py +++ b/src/phalanx/services/environment.py @@ -2,8 +2,17 @@ from __future__ import annotations +from datetime import timedelta + +from ..exceptions import CommandFailedError, VaultNotFoundError +from ..github import action_group +from ..models.applications import Project +from ..models.vault import VaultCredentials +from ..storage.argocd import ArgoCDStorage from ..storage.config import ConfigStorage from ..storage.helm import HelmStorage +from ..storage.kubernetes import KubernetesStorage +from ..storage.vault import VaultStorage __all__ = ["EnvironmentService"] @@ -15,15 +24,161 @@ class EnvironmentService: ---------- config_storage Storage object for the Phalanx configuration. + argocd_storage + Interface to Argo CD actions. + kubernetes_storage + Interface to direct Kubernetes object manipulation. helm_storage Interface to Helm actions. + vault_storage + Factory class for Vault clients. """ def __init__( - self, config_storage: ConfigStorage, helm_storage: HelmStorage + self, + *, + config_storage: ConfigStorage, + argocd_storage: ArgoCDStorage, + kubernetes_storage: KubernetesStorage, + helm_storage: HelmStorage, + vault_storage: VaultStorage, ) -> None: self._config = config_storage + self._argocd = argocd_storage + self._kubernetes = kubernetes_storage self._helm = helm_storage + self._vault_storage = vault_storage + + def install( + self, + environment_name: str, + vault_credentials: VaultCredentials, + git_branch: str | None = None, + ) -> None: + """Install a Phalanx environment. + + Parameters + ---------- + environment_name + Environment to install. + vault_credentials + Credentials to use for Vault access. These will be installed in + the cluster as a ``Secret`` used by vault-secrets-operator. + git_branch + Git branch to point Argo CD at. If not given, defaults to the + current branch. + + Raises + ------ + CommandFailedError + Raised if one of the underlying commands fails. + VaultNotFoundError + Raised if a necessary secret was not found in Vault. + """ + environment = self._config.load_environment(environment_name) + vault = self._vault_storage.get_vault_client( + environment, credentials=vault_credentials + ) + + # Get information about the local repository. + git_url = self._config.get_git_url() + if not git_branch: + git_branch = self._config.get_git_branch() + + # Get the plain-text Argo CD admin password from Vault. + argocd_secret = vault.get_application_secret("argocd") + argocd_password = argocd_secret.get("admin.plaintext_password") + if not argocd_password: + raise VaultNotFoundError( + vault.url, f"{vault.path}/argocd", "admin.plaintext_password" + ) + + # Add the dependency repositories of the applications we're installing + # directly with Helm, and refresh the Helm dependency cache. + with action_group("Update Helm dependencies"): + repo_urls = set() + for app_name in ("vault-secrets-operator", "argocd"): + app_urls = self._config.get_dependency_repositories(app_name) + repo_urls.update(app_urls) + for url in sorted(repo_urls): + self._helm.repo_add(url) + self._helm.repo_update() + + # Install vault-secrets-operator. Argo CD depends on this, so it has + # to be installed and configured with its Vault secret first. + with action_group("Install vault-secrets-operator"): + self._kubernetes.create_namespace( + "vault-secrets-operator", ignore_fail=True + ) + self._kubernetes.create_generic_secret( + "vault-credentials", + "vault-secrets-operator", + vault_credentials.to_secret_data(), + ) + self._helm.dependency_update("vault-secrets-operator") + self._helm.upgrade_application( + "vault-secrets-operator", + environment.name, + {"vault-secrets-operator.vault.address": vault.url}, + ) + + # Install Argo CD. + with action_group("Install Argo CD"): + self._helm.dependency_update("argocd") + self._helm.upgrade_application( + "argocd", + environment.name, + {"global.vaultSecretsPath": environment.vault_path_prefix}, + ) + + # Create and sync the top-level Argo CD application. + with action_group("Install science-platform app-of-apps"): + self._argocd.login("admin", argocd_password.get_secret_value()) + self._argocd.create_environment( + environment.name, + "science-platform", + git_url=git_url, + git_branch=git_branch, + ) + self._argocd.sync("science-platform") + project = Project.infrastructure + self._argocd.set_project("science-platform", project) + + # Sync Argo CD and wait for it to finish syncing so that the pods + # don't restart in the middle of proxying another Argo CD operation. + with action_group("Sync Argo CD"): + try: + self._argocd.sync("argocd") + except CommandFailedError: + # As of Argo CD 2.10.5, the first execution always fails with + # a spurious error claiming the infrastructure project had not + # been created. This is transient; the second execution + # succeeds. + self._argocd.sync("argocd") + for deployment in ( + "deployment/argocd-server", + "deployment/argocd-repo-server", + "statefulset/argocd-application-controller", + ): + self._kubernetes.wait_for_rollout(deployment, "argocd") + + # Sync applications that others have dependencies on, if they're + # enabled. + with action_group("Sync infrastructure applications"): + for application in ( + "ingress-nginx", + "cert-manager", + "postgres", + "gafaelfawr", + ): + if application in environment.applications: + self._argocd.sync(application) + + # Sync everything else. + with action_group("Sync remaining applications"): + self._argocd.sync_all( + "science-platform", timeout=timedelta(minutes=5) + ) def lint(self, environment: str | None = None) -> bool: """Lint the Helm chart for environments. diff --git a/src/phalanx/storage/argocd.py b/src/phalanx/storage/argocd.py new file mode 100644 index 0000000000..d14f6d6ec0 --- /dev/null +++ b/src/phalanx/storage/argocd.py @@ -0,0 +1,206 @@ +"""Interface to Argo CD operations.""" + +from __future__ import annotations + +from datetime import timedelta + +from ..models.applications import Project +from .command import Command + +__all__ = ["ArgoCDStorage"] + + +class ArgoCDStorage: + """Interface to Argo CD operations. + + Calls the :command:`argocd` command-line client. Used primarily by the + installer. + """ + + def __init__(self) -> None: + self._argocd = Command("argocd") + + def create_environment( + self, + environment: str, + app_of_apps_name: str, + *, + git_url: str, + git_branch: str, + ) -> None: + """Manually create an Argo CD application. + + Used only by the installer for installing the app of apps to bootstrap + the environment. + + Parameters + ---------- + environment + Name of the environment. + app_of_apps_name + Name of the app of apps Argo CD application. + git_url + URL to the Phalanx Git repository. + git_branch + Name of the branch in that repository from which to pull the Argo + CD configuration. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "app", + "create", + app_of_apps_name, + "--repo", + git_url, + "--path", + "environments", + "--dest-namespace", + "argocd", + "--dest-server", + "https://kubernetes.default.svc", + "--upsert", + "--revision", + git_branch, + "--helm-set", + f"repoUrl={git_url}", + "--helm-set", + f"targetRevision={git_branch}", + "--values", + "values.yaml", + "--values", + f"values-{environment}.yaml", + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def login(self, username: str, password: str) -> None: + """Authenticate to Argo CD. + + Authenticates using username and password authentication with port + forwarding. This normally must be done before any other Argo CD + operations. + + Parameters + ---------- + username + Username for authentication. (Usually this will be ``admin``.) + password + Password for that user. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "login", + "--plaintext", + "--username", + username, + "--password", + password, + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def set_project(self, application: str, project: Project) -> None: + """Set the Argo CD project of an application. + + Parameters + ---------- + application + Application to change. + project + Project to move it into. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "app", + "set", + application, + "--project", + project.value, + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def sync( + self, application: str, *, timeout: timedelta = timedelta(seconds=30) + ) -> None: + """Sync a specific Argo CD application. + + Parameters + ---------- + application + Name of the application. + timeout + How long to wait for the sync to complete. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Argo CD as an option, so normally the command should fail and + raise `~phalanx.exceptions.CommandFailedError` instead. This + exception means the Argo CD timeout didn't work for some reason. + """ + self._argocd.run( + "app", + "sync", + application, + "--timeout", + str(int(timeout.total_seconds())), + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def sync_all( + self, + app_of_apps_name: str, + *, + timeout: timedelta = timedelta(seconds=30), + ) -> None: + """Sync all Argo CD applications under an app of apps. + + Parameters + ---------- + app_of_apps_name + Name of the parent app of apps. + timeout + How long to wait for the sync to complete. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Argo CD as an option, so normally the command should fail and + raise `~phalanx.exceptions.CommandFailedError` instead. This + exception means the Argo CD timeout didn't work for some reason. + """ + self._argocd.run( + "app", + "sync", + "-l", + f"argocd.argoproj.io/instance={app_of_apps_name}", + "--timeout", + str(int(timeout.total_seconds())), + "--port-forward", + "--port-forward-namespace", + "argocd", + ) diff --git a/src/phalanx/storage/command.py b/src/phalanx/storage/command.py index 2e96b80bb7..941d0c402b 100644 --- a/src/phalanx/storage/command.py +++ b/src/phalanx/storage/command.py @@ -3,9 +3,10 @@ from __future__ import annotations import subprocess +from datetime import timedelta from pathlib import Path -from ..exceptions import CommandFailedError +from ..exceptions import CommandFailedError, CommandTimedOutError __all__ = ["Command"] @@ -76,6 +77,7 @@ def run( cwd: Path | None = None, ignore_fail: bool = False, quiet: bool = False, + timeout: timedelta | None = None, ) -> None: """Run the command with the provided arguments. @@ -94,12 +96,19 @@ def run( quiet If `True`, discard standard output. Standard error is still displayed on the process standard error stream. + timeout + If given, the command will be terminated and a + `~phalanx.exceptions.CommandTimedOutError` will be raised if + execution time exceeds this timeout. Raises ------ CommandFailedError Raised if the command failed and ``ignore_fail`` was not set to `True`. + CommandTimedOutError + Raised if ``timeout`` was given and the command took longer than + that to complete. subprocess.SubprocessError Raised if the command could not be executed at all. """ @@ -110,3 +119,5 @@ def run( subprocess.run(cmdline, check=check, cwd=cwd, stdout=stdout) except subprocess.CalledProcessError as e: raise CommandFailedError(self._command, args, e) from e + except subprocess.TimeoutExpired as e: + raise CommandTimedOutError(self._command, args, e) from e diff --git a/src/phalanx/storage/config.py b/src/phalanx/storage/config.py index 8ba05a5d02..8f90cc76d7 100644 --- a/src/phalanx/storage/config.py +++ b/src/phalanx/storage/config.py @@ -8,6 +8,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, Self +from urllib.parse import urlparse import yaml from git import Diff @@ -17,6 +18,7 @@ from ..constants import HELM_DOCLINK_ANNOTATION from ..exceptions import ( ApplicationExistsError, + GitRemoteError, InvalidApplicationConfigError, InvalidSecretConfigError, UnknownEnvironmentError, @@ -312,6 +314,67 @@ def get_environment_chart_path(self) -> Path: """ return self._path / "environments" + def get_git_branch(self) -> str: + """Get the Git branch of the current repository. + + Returns + ------- + str + Branch name. + """ + return Repo(str(self._path)).active_branch + + def get_git_url(self) -> str: + """Get the Git URL of the current repository. + + Assumes that the current repository is a cloned Git repository with a + remote named ``origin`` and returns the URL of that origin, + transformed to an ``https`` URL if necessary. This is used to get the + URL of the repository for configuring Argo CD during installation of + an environment. + + Returns + ------- + str + URL to the Git repository of the current config tree, suitable + for Argo CD. + + Raises + ------ + GitRemoteError + Raised if the ``origin`` remote does not exist or if its URL is + not in a recognized format. + """ + repo = Repo(str(self._path)) + try: + origin = repo.remote("origin") + except ValueError as e: + msg = 'Current repository has no remote named "origin"' + raise GitRemoteError(msg) from e + if not origin.url: + raise GitRemoteError('Remote "origin" has no URL') + + # If the URL is not an https URL, accept a few forms of github.com + # URLs that can be converted into one. + parsed_url = urlparse(origin.url) + if parsed_url.scheme == "ssh" and parsed_url.hostname == "github.com": + return parsed_url._replace( + scheme="https", netloc=parsed_url.hostname + ).geturl() + elif parsed_url.scheme == "https": + return origin.url + elif parsed_url.scheme == "": + match = re.match(r"git@github\.com:([^:/]+/[^:/]+)$", origin.url) + if match: + return "https://github.com/" + match.group(1) + + # If we fell through, we were unable to parse the URL. + msg = ( + "Cannot determine Argo CD Git URL from origin URL of" + f' "{origin.url}"' + ) + raise GitRemoteError(msg) + def get_modified_applications(self, branch: str) -> dict[str, list[str]]: """Get all modified application and environment pairs. diff --git a/src/phalanx/storage/helm.py b/src/phalanx/storage/helm.py index 8bde6c0a41..fbb497978c 100644 --- a/src/phalanx/storage/helm.py +++ b/src/phalanx/storage/helm.py @@ -3,6 +3,7 @@ from __future__ import annotations import sys +from datetime import timedelta from urllib.parse import urlparse from ..exceptions import CommandFailedError @@ -38,6 +39,11 @@ def create(self, application: str, starter: HelmStarter) -> None: Name of the new application. starter Name of the Helm starter template to use. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ starter_path = self._config.get_starter_path(starter) application_path = self._config.get_application_chart_path(application) @@ -56,7 +62,8 @@ def dependency_update( Tell Helm to update any third-party chart dependencies for an application and store them in the :file:`charts` subdirectory. This is - a prerequisite for :command:`helm lint` or :command:`helm template`. + a prerequisite for `lint_application`, `template_application`, or + `upgrade_application`. Assumes that remote repositories have already been refreshed with `repo_update` and tells Helm to skip that. @@ -67,6 +74,11 @@ def dependency_update( Application whose dependencies should be updated. quiet Whether to suppress Helm's standard output. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ application_path = self._config.get_application_chart_path(application) self._helm.run( @@ -210,6 +222,8 @@ def repo_add(self, url: str, *, quiet: bool = False) -> None: Raises ------ + CommandFailedError + Raised if Helm fails. ValueError Raised if the Helm repository URL is invalid. """ @@ -231,6 +245,11 @@ def repo_update(self, *, quiet: bool = False) -> None: ---------- quiet Whether to suppress Helm's standard output. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ self._helm.run("repo", "update", quiet=quiet) @@ -332,6 +351,71 @@ def template_environment(self, environment: str) -> str: sys.stderr.write(result.stderr) return result.stdout + def upgrade_application( + self, + application: str, + environment: str, + values: dict[str, str], + *, + timeout: timedelta = timedelta(seconds=60), + ) -> None: + """Install or upgrade an application using Helm. + + Runs :command:`helm upgrade --install` to install an application chart + in the given environment. Assumes that :command:`helm dependency + update` has already been run to download any third-party charts. Any + output to standard error is passed along. + + This method bypasses Argo CD and should only be used by the installer + to bootstrap the environment. + + Parameters + ---------- + application + Name of the application. + environment + Name of the environment in which to lint that application chart, + used to select the :file:`values-{environment}.yaml` file to add. + values + Extra key/value pairs to set. + timeout + Fail if the operation takes longer than this. The enforced timeout + in Python will be one second longer to allow Helm to time out its + own command first. + + Raises + ------ + CommandFailedError + Raised if Helm fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Helm as an option, so normally the command should fail and raise + `~phalanx.exceptions.CommandFailedError` instead. This exception + means the Helm timeout didn't work for some reason. + """ + application_path = self._config.get_application_chart_path(application) + set_arg = ",".join(f"{k}={v}" for k, v in values.items()) + self._helm.run( + "upgrade", + application, + str(application_path), + "--install", + "--values", + f"{application}/values.yaml", + "--values", + f"{application}/values-{environment}.yaml", + "--set", + set_arg, + "--create-namespace", + "--namespace", + application, + "--timeout", + f"{int(timeout.total_seconds())}s", + "--wait", + cwd=application_path.parent, + timeout=timeout + timedelta(seconds=1), + ) + def _print_lint_output( self, application: str | None, environment: str, output: str | None ) -> None: diff --git a/src/phalanx/storage/kubernetes.py b/src/phalanx/storage/kubernetes.py new file mode 100644 index 0000000000..a93dbf1d30 --- /dev/null +++ b/src/phalanx/storage/kubernetes.py @@ -0,0 +1,80 @@ +"""Storage layer for direct Kubernetes operations.""" + +from __future__ import annotations + +from .command import Command + +__all__ = ["KubernetesStorage"] + + +class KubernetesStorage: + """Storage layer for direct Kubernetes operations. + + Used primarily by the installer. This uses :command:`kubectl` directly + rather than one of the Python Kubernetes libraries since it seemed simpler + at the time. + """ + + def __init__(self) -> None: + self._kubectl = Command("kubectl") + + def create_namespace( + self, namespace: str, *, ignore_fail: bool = False + ) -> None: + """Create a Kubernetes namespace. + + Parameters + ---------- + namespace + Namespace to create. + ignore_fail + If `True`, ignore failures, such as when the namespace already + exists. + + Raises + ------ + CommandFailedError + Raised if the namespace creation fails, and ``ignore_fail`` was + not set to `True`. + """ + self._kubectl.run("create", "ns", namespace, ignore_fail=ignore_fail) + + def create_generic_secret( + self, name: str, namespace: str, keys: dict[str, str] + ) -> None: + """Create a generic Kubernetes ``Secret`` resource. + + Parameters + ---------- + name + Name of the secret. + namespace + Namespace of the secret. + keys + Key and value pairs to put into the secret. + """ + args = [ + "create", + "secret", + "generic", + name, + "--namespace", + namespace, + ] + for key, value in keys.items(): + args.append(f"--from-literal={key}={value}") + self._kubectl.run(*args) + + def wait_for_rollout(self, name: str, namespace: str) -> None: + """Wait for a Kubernetes rollout to complete. + + Parameters + ---------- + name + Name of the rollout. This should be the type of object (usually + either ``deployment`` or ``statefulset``, followed by a slash and + the name of the object. + namespace + Namespace in which the rollout is happening. + """ + self._kubectl.run("-n", namespace, "rollout", "status", name) diff --git a/src/phalanx/storage/vault.py b/src/phalanx/storage/vault.py index 97282775f7..4eedd9af36 100644 --- a/src/phalanx/storage/vault.py +++ b/src/phalanx/storage/vault.py @@ -12,8 +12,11 @@ from ..models.environments import EnvironmentBaseConfig from ..models.vault import ( VaultAppRole, + VaultAppRoleCredentials, VaultAppRoleMetadata, + VaultCredentials, VaultToken, + VaultTokenCredentials, VaultTokenMetadata, ) @@ -26,9 +29,16 @@ class VaultClient: This client is specific to a particular Phalanx environment. It is created using the metadata of a Phalanx environment by `VaultStorage`. - The Vault authentication token is taken from either the ``VAULT_TOKEN`` - environment variable or a :file:`.vault-token` file in the user's home - directory. + If neither ``approle`` nor ``token`` are given, token authentication is + used and the the token is taken from the ``VAULT_TOKEN`` environment + variable or a :file:`.vault-token` file in the user's home directory. + + Attributes + ---------- + url + URL of the configured Vault server. + path + Prefix path within Vault where secrets are stored. Parameters ---------- @@ -37,13 +47,30 @@ class VaultClient: path Path within that Vault server where secrets for an environment are stored. + credentials + Credentials to use for authentication. If this is not set, fall back + on the default library behavior of getting the token from the + environment or the user's home directory. """ - def __init__(self, url: str, path: str) -> None: - self._url = url - _, self._path = path.split("/", 1) + def __init__( + self, + url: str, + path: str, + credentials: VaultCredentials | None = None, + ) -> None: + self.url = url + _, self.path = path.split("/", 1) self._vault = hvac.Client(url) self._vault.secrets.kv.default_kv_version = 2 + match credentials: + case VaultAppRoleCredentials(): + self._vault.auth.approle.login( + role_id=credentials.role_id, + secret_id=credentials.secret_id, + ) + case VaultTokenCredentials(): + self._vault.token = credentials.token def create_approle(self, name: str, policies: list[str]) -> VaultAppRole: """Create a new Vault AppRole for secret access. @@ -137,7 +164,7 @@ def delete_application_secret(self, application: str) -> None: application Name of the application. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" with suppress(InvalidPath): self._vault.secrets.kv.delete_latest_version_of_secret(path) @@ -159,13 +186,13 @@ def get_application_secret(self, application: str) -> dict[str, SecretStr]: VaultNotFoundError Raised if the requested secret was not found in Vault. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" try: r = self._vault.secrets.kv.read_secret( path, raise_on_deleted_version=True ) except InvalidPath as e: - raise VaultNotFoundError(self._url, path) from e + raise VaultNotFoundError(self.url, path) from e return {k: SecretStr(v) for k, v in r["data"]["data"].items()} def get_approle(self, name: str) -> VaultAppRoleMetadata | None: @@ -264,9 +291,9 @@ def list_application_secrets(self) -> list[str]: Raised if the path for application secrets does not exist. """ try: - r = self._vault.secrets.kv.list_secrets(self._path) + r = self._vault.secrets.kv.list_secrets(self.path) except InvalidPath as e: - raise VaultNotFoundError(self._url, self._path) from e + raise VaultNotFoundError(self.url, self.path) from e return r["data"]["keys"] def list_token_accessors(self) -> list[str]: @@ -314,7 +341,7 @@ def store_application_secret( values Secret key and value pairs. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" secret = {k: v.get_secret_value() for k, v in values.items()} self._vault.secrets.kv.create_or_update_secret(path, secret) @@ -332,7 +359,7 @@ def update_application_secret( value New value for that secret key. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" self._vault.secrets.kv.patch(path, {key: value.get_secret_value()}) @@ -340,7 +367,11 @@ class VaultStorage: """Create Vault clients for specific environments.""" def get_vault_client( - self, env: EnvironmentBaseConfig, path_prefix: str | None = None + self, + env: EnvironmentBaseConfig, + path_prefix: str | None = None, + *, + credentials: VaultCredentials | None = None, ) -> VaultClient: """Return a Vault client configured for the given environment. @@ -351,14 +382,24 @@ def get_vault_client( path_prefix Path prefix within Vault for application secrets. If given, this overrides the path prefix in the environment configuration. + credentials + Credentials to use for authentication. If this is not set, fall + back on the default library behavior of getting the token from + the environment or the user's home directory. Returns ------- VaultClient Vault client configured to manage secrets for that environment. + + Raises + ------ + ValueError + Raised if ``vaultUrl`` is not set for the environment or if both + a Vault AppRole and a Vault token were provided. """ if not path_prefix: path_prefix = env.vault_path_prefix if not env.vault_url: raise ValueError("vaultUrl not set for this environment") - return VaultClient(str(env.vault_url), path_prefix) + return VaultClient(str(env.vault_url), path_prefix, credentials) diff --git a/tox.ini b/tox.ini index 99fc150b99..87733c00bd 100644 --- a/tox.ini +++ b/tox.ini @@ -32,8 +32,14 @@ depends = commands = coverage report +[testenv:install] +description = Run the installer (used for CI) +commands = phalanx environment install {posargs} +passenv = + GITHUB_* + [testenv:lint] -description = Lint codebase by running pre-commit (Black, isort, Flake8). +description = Lint codebase by running pre-commit skip_install = true deps = pre-commit From 4bb4d10b7a61441c3a21fcc6a2ad6b129f25dd69 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Fri, 5 Apr 2024 14:24:07 -0700 Subject: [PATCH 2/7] Document the new Phalanx installer Remove documentation for the old install script and document the new installation process. Fix an ordering problem in the documentation for how to set up a new environment by putting assembly of the configuration for that environment before setting up secrets management. --- .gitignore | 2 - README.md | 2 +- applications/argocd/secrets.yaml | 6 +- docs/about/repository.rst | 13 +-- docs/admin/create-environment.rst | 55 +++++++++ docs/admin/hostnames.rst | 6 + docs/admin/index.rst | 1 + docs/admin/installation.rst | 105 +++++++++--------- docs/admin/migrating-secrets.rst | 2 +- docs/admin/requirements.rst | 8 +- docs/applications/argocd/bootstrap.rst | 2 +- docs/applications/argocd/upgrade.rst | 25 +++-- .../vault-secrets-operator/bootstrap.rst | 35 +++++- pyproject.toml | 1 - 14 files changed, 170 insertions(+), 93 deletions(-) create mode 100644 docs/admin/create-environment.rst diff --git a/.gitignore b/.gitignore index 424a99fcb7..a553144547 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -/installer/secrets/ -/installer/docker-creds /applications/*/charts/*.tgz /applications-expanded/ **/Chart.lock diff --git a/README.md b/README.md index 8d82cd2a83..d32c7ff799 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Phalanx This is the Argo CD repository for the Rubin Science Platform. -It stores the root Argo CD application, deployment configuration for the other applications, the installer, and other helper scripts. +It stores the root Argo CD application, deployment configuration for the other applications, and a command-line tool to manage Phalanx environments. See [phalanx.lsst.io](https://phalanx.lsst.io/) for full documentation. diff --git a/applications/argocd/secrets.yaml b/applications/argocd/secrets.yaml index 165335a81b..774814ab49 100644 --- a/applications/argocd/secrets.yaml +++ b/applications/argocd/secrets.yaml @@ -2,9 +2,9 @@ description: >- Admin password for Argo CD. This password is normally not used because Argo CD is configured to use Keycloak, Google, or GitHub authentication, - but it is used by the installer (which cannot use external authentication) - and is useful as a fallback if external authentication is not working for - some reason. This secret can be changed at any time. + but it is used during installation and is useful as a fallback if external + authentication is not working for some reason. This secret can be changed + at any time. generate: type: password "admin.password": diff --git a/docs/about/repository.rst b/docs/about/repository.rst index 89c25a23aa..ad5104650b 100644 --- a/docs/about/repository.rst +++ b/docs/about/repository.rst @@ -55,17 +55,6 @@ Each environment then has a file named :file:`values-{environment}.yaml` that de The templates directory also contains the Argo CD ``AppProject`` resources, which are used to classify the applications into groups for access control. -installer directory -------------------- - -:bdg-link-primary-line:`Browse installer/ on GitHub ` - -This directory contains a script named `install.sh `__. -The arguments to this are the name of the environment, the Vault RoleID, and the Vault SecretID (see :ref:`secrets` for more details on Vault). -This installer script is the entry point for setting up a new environment. -It can also be run on an existing environment to update it. -See the :ref:`environment bootstrapping documentation ` for details. - charts directory ---------------- @@ -136,7 +125,7 @@ In one check, Pre-commit regenerates Helm chart documentation for applications w See the `.pre-commit-config.yaml `__ file for configuration details. Learn how to set up Pre-commit in your local editing environment in :doc:`local-environment-setup`. -Second, GitHub Actions runs a CI workflow (`.github/workflows/ci.yaml `__). +Second, GitHub Actions runs a CI workflow (`.github/workflows/ci.yaml `__). This workflow has four key jobs: - Linting with Pre-commit_, mirroring the local editing environment. diff --git a/docs/admin/create-environment.rst b/docs/admin/create-environment.rst new file mode 100644 index 0000000000..d586bb3399 --- /dev/null +++ b/docs/admin/create-environment.rst @@ -0,0 +1,55 @@ +################################ +Create a new Phalanx environment +################################ + +Each separate installation of Phalanx is called an environment. +An environment has a hostname, Vault server and path to its secrets, and a set of Phalanx applications that should be installed in that environment. + +Each Phalanx environment must be installed in a separate Kubernetes cluster. +Two Phalanx environments cannot coexist in the same cluster. + +Before starting this process, ensure that you have met the :doc:`requirements to run Phalanx ` and that you have decided on your :doc:`handling of hostnames and TLS `. + +Creating an environment +======================= + +To create a new Phalanx environment, take the following steps: + +.. rst-class:: open + +#. Fork the `Phalanx repository`_ if this work is separate from the SQuaRE-managed environments. + +#. Create a new :file:`values-{environment}.yaml` file in `environments `__. + Start with a template copied from an existing environment that's similar to the new environment. + Edit it so that ``name``, ``fqdn``, ``vaultUrl``, and ``vaultPathPrefix`` at the top match your new environment. + You may omit ``vaultUrl`` for SQuaRE-managed environments. + See :doc:`secrets-setup` for more information about the latter two settings and additional settings you may need. + If the environment will be hosted on Google Kubernetes Engine, also fill out ``gcp.projectId``, ``gcp.region``, and ``gcp.clusterName`` with metadata about where the environment will be hosted. + Enable the applications this environment should include. + +#. Do what DNS setup you can. + If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. + If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. + +#. Decide on your approach to user home directory storage. + The Notebook Aspect (the ``nublado`` application) requires a POSIX file system. + The most frequently used method of providing that file system is NFS mounts, but you may instead want to use persistent volume claims or a different file system that's mounted on the Kubernetes cluster nodes and exposed to pods via ``hostPath``. + Whatever storage you choose, you will need to configure appropriate mount points in :px-app:`nublado` when you configure each application in the next step. + +#. For each enabled application, create a corresponding :file:`values-{environment}.yaml` file in the relevant directory under `applications `__. + Customization will vary from application to application. + The following applications have special bootstrapping considerations: + + - :px-app-bootstrap:`argocd` + - :px-app-bootstrap:`gafaelfawr` + - :px-app-bootstrap:`nublado` + - :px-app-bootstrap:`portal` + - :px-app-bootstrap:`squareone` + +#. Add the URL of your new environment to :file:`docs/documenteer.toml` under ``phinx.linkcheck.ignore``. + The Argo CD URL of your environment will be unreachable, so you need to tell Sphinx valid link checking to ignore it. + +Next steps +========== + +- Define the secrets for your new environment and store them in Vault: :doc:`secrets-setup` diff --git a/docs/admin/hostnames.rst b/docs/admin/hostnames.rst index c1a55eb5df..3045c95f00 100644 --- a/docs/admin/hostnames.rst +++ b/docs/admin/hostnames.rst @@ -24,7 +24,13 @@ To use the first approach, you must have the following: * An :abbr:`AWS (Amazon Web Services)` account in which you can create two Route 53 hosted domains. You must use this domain for the hostname of the Science Platform installation. + * The ability to delegate to that Route 53 hosted domain from some public DNS domain. This means either registering a domain via Amazon, registering a domain elsewhere and pointing it to Amazon's Route 53 DNS servers, or creating a subdomain of an existing public domain by adding ``NS`` records to that domain for a subdomain hosted on Route 53. If neither of those requirements sound familiar, you almost certainly want to use the second option and purchase a commercial certificate. + +Next steps +========== + +- Define the configuration for your new Phalanx environment: :doc:`create-environment` diff --git a/docs/admin/index.rst b/docs/admin/index.rst index a7ac396f4a..21c7ef39a5 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -17,6 +17,7 @@ Administrators operate infrastructure, manage secrets, and are involved in the d requirements hostnames + create-environment secrets-setup installation diff --git a/docs/admin/installation.rst b/docs/admin/installation.rst index 0768857eae..92e9924eb8 100644 --- a/docs/admin/installation.rst +++ b/docs/admin/installation.rst @@ -2,84 +2,81 @@ Installing a Phalanx environment ################################ -Each separate installation of Phalanx is called an environment. -An environment has a hostname, Vault server and path to its secrets, and a set of Phalanx applications that should be installed in that environment. - +Once you have :doc:`created the configuration for your new environment ` and :doc:`set up secrets `, you are ready to do the installation. Before starting this process, ensure that you have met the :doc:`requirements to run Phalanx `. -Then, set up the required secrets for your new environment as documented in :doc:`secrets-setup`. If you are setting up an environment that will be running a 1Password Connect server for itself, you will need to take special bootstrapping steps. See :px-app-bootstrap:`onepassword-connect` for more information. -Creating an environment -======================= +Installing Phalanx +================== -To create a new Phalanx environment, take the following steps: +Follow these steps to install Phalanx. +These can be run repeatedly to reinstall Phalanx over an existing deployment. -.. rst-class:: open +#. Create a Vault AppRole that will be used by Vault Secrets Operator. + Set the ``VAULT_TOKEN`` environment variable to a token with the ability to create new AppRoles (for SQuaRE clusters, use the admin token), and then run: -#. Fork the `Phalanx repository`_ if this work is separate from the SQuaRE-managed environments. + .. prompt:: bash -#. Create a new :file:`values-{environment}.yaml` file in `environments `__. - Start with a template copied from an existing environment that's similar to the new environment. - Edit it so that ``name``, ``fqdn``, ``vaultUrl``, and ``vaultPathPrefix`` at the top match your new environment. - You may omit ``vaultUrl`` for SQuaRE-managed environments. - See :doc:`secrets-setup` for more information about the latter two settings and additional settings you may need. - If the environment will be hosted on Google Kubernetes Engine, also fill out ``gcp.projectId``, ``gcp.region``, and ``gcp.clusterName`` with metadata about where the environment will be hosted. - Enable the applications this environment should include. + phalanx vault create-read-approle -#. Decide on your approach to TLS certificates. - See :doc:`hostnames` for more details. - This may require DNS configuration in Route 53 if this is the first deployment in a new domain and you are using Let's Encrypt for certificates. + Unset ``VAULT_TOKEN`` when this command finishes. -#. Do what DNS setup you can. - If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. - If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. + Be aware that this will invalidate any existing AppRole for that environment. -#. Decide on your approach to user home directory storage. - The Notebook Aspect (the ``nublado`` application) requires a POSIX file system. - The most frequently used method of providing that file system is NFS mounts, but you may instead want to use persistent volume claims or a different file system that's mounted on the Kubernetes cluster nodes and exposed to pods via ``hostPath``. - Whatever storage you choose, you will need to configure appropriate mount points in :px-app:`nublado` when you configure each application in the next step. +#. Set the environment variables ``VAULT_ROLE_ID`` and ``VAULT_SECRET_ID`` to the Role ID and Secret ID printed out by that command. -#. For each enabled application, create a corresponding :file:`values-{environment}.yaml` file in the relevant directory under `applications `__. - Customization will vary from application to application. - The following applications have special bootstrapping considerations: +#. Ensure that your default Kubernetes cluster for :command:`kubectl` and :command:`helm` is set to point to the Kubernetes cluster into which you want to install the Phalanx environment. + You can verify this with :command:`kubectl config current-context`. - - :px-app-bootstrap:`argocd` - - :px-app-bootstrap:`gafaelfawr` - - :px-app-bootstrap:`nublado` - - :px-app-bootstrap:`portal` - - :px-app-bootstrap:`squareone` +#. Start the install: -#. Add the URL of your new environment to :file:`docs/documenteer.toml` under ``phinx.linkcheck.ignore``. - The Argo CD URL of your environment will be unreachable, so you need to tell Sphinx valid link checking to ignore it. + .. prompt:: bash -Installing Phalanx -================== + phalanx environment install -Once you have defined a Phalanx environment, follow these steps to install it. -These can be run repeatedly to reinstall Phalanx over an existing deployment. + You will be prompted to confirm that you want to proceed. -#. Create a Vault AppRole that will be used by Vault Secrets Operator. +#. If the installation is using a dynamically-assigned IP address, you will need to set up the A record (and AAAA record if using IPv6) in DNS once that address has been assigned. + Wait until the ``ingress-nginx`` application has been installed, which happens after Argo CD has been installed but before most applications are synced. + Then, wait for it to be assigned an external IP address. + Obtain that IP address with :command:`kubectl get -n ingress-nginx service` (look for the external IP). + Then, set the A record in DNS for your environment to that address. + For installations that are intended to be long-lived and that can reliably request the same address, add that IP address to the :file:`values-{environment}.yaml` file in :file:`applications/ingress-nginx` for your environment. + The setting to use is ``ingress-nginx.controller.service.loadBalancerIP``. + This ensures that ingress-nginx will always request that address. - .. prompt:: bash +#. If you are deploying on Google Cloud Platform, consider converting the dynamically-assigned IP address to a static IP. + You can do this in the GCP console under :menuselection:`VPC Network -> IP addresses`. - phalanx vault create-read-approle +#. Debug any problems during installation. + The most common source of problems are errors or missing configuration in the :file:`values-{environment}.yaml` files you created for each application. + You can safely run the installer repeatedly as you debug and fix issues. - Be aware that this will invalidate any existing AppRole for that environment. +Using a Vault token rather than AppRole +======================================= -#. Run the installer script at `installer/install.sh `__. +The default and recommended installation approach is to use a Vault AppRole for vault-secrets-operator to authenticate to Vault. +However, using a read-only Vault token is still supported. - .. prompt:: bash +To use a Vault token instead of an AppRole, create an appropriate read-only token with access to the Vault path configured in :file:`enviroments/values-{environment}.yaml` for your environment. +Skip step 1 in the normal installation process, since you don't need to create an AppRole. +In step 2, set ``VAULT_TOKEN`` to the read-only token and do not set ``VAULT_ROLE_ID`` or ``VAULT_SECRET_ID``. +Then continue the regular installation process. - installer/install.sh +Troubleshooting tools +===================== - ```` and ```` are the Role ID and Secret ID of the Vault AppRole created in the previous step. +The tools to use for troubleshooting will vary depending on how far the installer has gotten. - Debug any problems. - The most common source of problems are errors or missing configuration in the :file:`values-{environment}.yaml` files you created for each application. - You can safely run the installer repeatedly as you debug and fix issues. +- If something fails before Argo CD is installed, you will need to use :command:`kubectl` to look around in Kubernetes, retrieve logs, and look at error messages. + +- If Argo CD is installed and working, but ingress-nginx fails, you can additionally use the :command:`argocd` command-line tool. + The installer will have created login credentials for Argo CD as the admin user for you, so you shouldn't need to do that again. + Pass the flags ``--port-forward --port-forward-namespace argocd`` to :command:`argocd` to proxy to the Argo CD server without needing to have the ingress working. -#. If the installation is using a dynamically-assigned IP address, while the installer is running, wait until the ingress-nginx-controller service comes up and has an external IP address. - Then, set the A record for your endpoint to that address (or set an A record with that IP address for the ingress and a CNAME from the endpoint to the A record). - For installations that are intended to be long-lived, it is worth capturing this IP address at this point and modifying the ``ingress-nginx`` configuration to use it statically should you ever need to reinstall the instance. +- If the ingress was successfully installed and you've created the DNS record for your environment, you can use the Argo CD web UI the same as you would with a fully-installed cluster. + If your Argo CD authentication configuration is working (see :doc:`/applications/argocd/authentication`), you can log in as you normally would. + If it is not, you will need to use the admin password. + You can get this from Vault in the ``admin.plaintext_password`` key of the ``argocd`` secret. diff --git a/docs/admin/migrating-secrets.rst b/docs/admin/migrating-secrets.rst index 10c20d9197..5ae14d5a6a 100644 --- a/docs/admin/migrating-secrets.rst +++ b/docs/admin/migrating-secrets.rst @@ -3,7 +3,7 @@ Migrating to the new secrets management system ############################################## We introduced a new command-line-driven secrets management system for Phalanx environments in September of 2023. -This page documents how to migrate to the new system from the older scripts in :file:`installer`. +This page documents how to migrate to the new system. These instructions assume that, if you are using 1Password for static secrets, you have already set up a 1Password vault and enabled the :px-app:`1Password Connect server ` for this environment. If you have not yet done this, see :doc:`/applications/onepassword-connect/add-new-environment`. diff --git a/docs/admin/requirements.rst b/docs/admin/requirements.rst index e024ae6b29..5d8611273e 100644 --- a/docs/admin/requirements.rst +++ b/docs/admin/requirements.rst @@ -54,7 +54,7 @@ For installing an environment, you will also need the following tools: .. warning:: - Although the Argo CD command-line client must be installed to use the Phalanx installer, do not use it to create applications. + Although the Argo CD command-line client must be installed to install Phalanx in a new environment, do not use it to create applications. All Argo CD applications should be managed through Phalanx and the ``science-platform`` app of apps. - The Vault command-line client. @@ -62,3 +62,9 @@ For installing an environment, you will also need the following tools: To see the version currently used for testing, search for ``vault_`` in `.github/workflows/ci.yaml `__. - Git 2.22 or later. + +Next steps +========== + +- Decide on how you plan to handle hostnames and TLS: :doc:`hostnames` +- Create the configuration for your new Phalanx environment: :doc:`create-environment` diff --git a/docs/applications/argocd/bootstrap.rst b/docs/applications/argocd/bootstrap.rst index ad9c177cec..131550eb51 100644 --- a/docs/applications/argocd/bootstrap.rst +++ b/docs/applications/argocd/bootstrap.rst @@ -9,7 +9,7 @@ Authentication Initial installation of the Rubin Science Platform is done using Argo CD and a static password for the ``admin`` account. You can then log on to the ``admin`` account using that password to manage the resulting environment. -The password is available as the ``admin.plaintext_password`` key in Vault secret for the ``argocd`` application, and in the ``Secret`` resource named ``argocd-secret`` in the ``argocd`` namespace after installation of the environment. +The password is available as the ``admin.plaintext_password`` key in Vault secret for the ``argocd`` application. As part of bootstrapping a new environment, you should also configure per-user authentication. To do this, follow the instructions in :doc:`authentication`. diff --git a/docs/applications/argocd/upgrade.rst b/docs/applications/argocd/upgrade.rst index e309b99a3f..70d247f601 100644 --- a/docs/applications/argocd/upgrade.rst +++ b/docs/applications/argocd/upgrade.rst @@ -71,11 +71,13 @@ Only use this process if the automatic upgrade failed or if there are documented .. code-block:: sh - cd phalanx/installer - helm upgrade --install argocd argo/argo-cd --version $VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + cd applications + helm upgrade --install argocd argocd \ + --values argocd/values.yaml --values argocd/values-$ENVIRONMENT.yaml \ + --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ + --namespace argocd --wait --timeout 900s - Replace ``$VERSION`` with the Helm chart version (**not** the Argo CD application version) that you want to install. + Replace ``$ENVIRONMENT`` with the name of the Phalanx environment you're attempting to repair, and ``$VAULT_PATH_PREFIX`` with the Vault path prefix (from :file:`environments/values-{environment}.yaml`) for that environment. If all goes well, you can now view the UI at ``/argo-cd`` and confirm that everything still looks correct. @@ -106,17 +108,18 @@ You can then recreate the namespace, reinstall Argo CD, and restore the backup: .. code-block:: sh - kubectl create namespace argocd - cd phalanx/installer - helm upgrade --install argocd argo/argo-cd --version $HELM_VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + cd applications + helm upgrade --install argocd argocd \ + --values argocd/values.yaml --values argocd/values-$ENVIRONMENT.yaml \ + --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ + --namespace argocd --create-namespace --wait --timeout 900s chmod 644 ~/.kube/config docker run -i -v ~/.kube:/home/argocd/.kube --rm \ argoproj/argocd:$VERSION argocd-util import -n argocd - < backup.yaml chmod 600 ~/.kube/config -Replace ``$HELM_VERSION`` with the version of the Helm chart you want to use and ``$VERSION`` with the corresponding Argo CD version (as shown via ``helm search repo``). +Replace ``$ENVIRONMENT`` with the name of the Phalanx environment you're attempting to repair, and ``$VAULT_PATH_PREFIX`` with the Vault path prefix (from :file:`environments/values-{environment}.yaml`) for that environment. This should hopefully restore Argo CD to a working state. -If it doesn't, you'll need to reinstall it using the more extended process used by the cluster installer. -See `installer/install.sh `__ for the commands to run. +If it doesn't, you'll need to reinstall it using the more extended process used by :command:`phalanx environment install`. +See :doc:`/admin/installation` for that process. diff --git a/docs/applications/vault-secrets-operator/bootstrap.rst b/docs/applications/vault-secrets-operator/bootstrap.rst index e41195add0..d6403209fc 100644 --- a/docs/applications/vault-secrets-operator/bootstrap.rst +++ b/docs/applications/vault-secrets-operator/bootstrap.rst @@ -5,7 +5,10 @@ Bootstrapping vault-secrets-operator #################################### Vault Secrets Operator is the only component of the Science Platform whose secret has to be manually created, so that it can create the secrets for all other applications. -This will be done automatically by the `install script `__. +This will be done automatically by the installer. + +AppRole authentication +====================== When using the newer, recommended :ref:`secrets management system `, the secret created by the installer will look like this: @@ -21,9 +24,30 @@ When using the newer, recommended :ref:`secrets management system type: Opaque -This secret will normally be created by either the installer or :command:`phalanx vault create-read-approle`. +This secret will normally be created by either the installer or by piping :command:`phalanx vault create-read-approle --as-secret vault-credentials` into :command:`kubectl apply`. +This is the default configuration of vault-secrets-operator. + +Token authentication +==================== -Using a regular Vault token is still supported, in which case the secret will look like this: +Using a regular Vault token is still supported, but requires special per-environment configuration for vault-secrets-operator. +Put the following into :file:`applications/vault-secrets-operator/values-{environment}.yaml`: + +.. code-block:: yaml + + vault-secrets-operator: + environmentVars: + - name: "VAULT_TOKEN" + valueFrom: + secretKeyRef: + name: "vault-secrets-operator" + key: "VAULT_TOKEN" + - name: "VAULT_TOKEN_LEASE_DURATION" + value: "31536000" + vault: + authMethod: "token" + +In this case, the created secret will look like: .. code-block:: yaml @@ -36,6 +60,5 @@ Using a regular Vault token is still supported, in which case the secret will lo VAULT_TOKEN: type: Opaque -This secret will be created by the installer when given a ``VAULT_TOKEN`` parameter. - -In either case, the Vault token or AppRole must have read access to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. +This secret will be created by the installer when ``VAULT_TOKEN`` is set in the environment instead of ``VAULT_ROLE_ID`` and ``VAULT_SECRET_ID``. +This Vault token must have raed access (and should not have write access) to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. diff --git a/pyproject.toml b/pyproject.toml index 98ec5c2ac6..7e1896adf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,6 @@ python_files = [ [tool.ruff] exclude = [ "docs/**", - "installer/**", ] line-length = 79 target-version = "py312" From fd17468aa7194b618716113a024f68accc52b6a9 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 9 Apr 2024 08:28:42 -0700 Subject: [PATCH 3/7] Rephrase vault-secrets-operator bootstrapping Be a bit clearer about how the secret for vault-secrets-operator is created. --- .../vault-secrets-operator/bootstrap.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/applications/vault-secrets-operator/bootstrap.rst b/docs/applications/vault-secrets-operator/bootstrap.rst index d6403209fc..ec62a8a0d5 100644 --- a/docs/applications/vault-secrets-operator/bootstrap.rst +++ b/docs/applications/vault-secrets-operator/bootstrap.rst @@ -4,13 +4,15 @@ Bootstrapping vault-secrets-operator #################################### -Vault Secrets Operator is the only component of the Science Platform whose secret has to be manually created, so that it can create the secrets for all other applications. -This will be done automatically by the installer. +Because it is the application that manages all of the other secrets in Phalanx, the secret for vault-secrets-operator itself, containing its Vault credentials, requires special handling. +It is normally created as the first step of a Phalanx bootstrap by the :doc:`installer `. + +This secret (``vault-credentials`` in the ``vault-secrets-operator`` namespace) exists only as a normal ``Secret`` resource and is not managed by Argo CD, so it will not appear in the Argo CD dashboard for the vault-secrets-operator application. AppRole authentication ====================== -When using the newer, recommended :ref:`secrets management system `, the secret created by the installer will look like this: +When using the newer, recommended :ref:`secrets management system `, vault-secrets-operator's secret looks like this: .. code-block:: yaml @@ -43,7 +45,7 @@ Put the following into :file:`applications/vault-secrets-operator/values-{enviro name: "vault-secrets-operator" key: "VAULT_TOKEN" - name: "VAULT_TOKEN_LEASE_DURATION" - value: "31536000" + value: "31536000" # One year vault: authMethod: "token" @@ -61,4 +63,4 @@ In this case, the created secret will look like: type: Opaque This secret will be created by the installer when ``VAULT_TOKEN`` is set in the environment instead of ``VAULT_ROLE_ID`` and ``VAULT_SECRET_ID``. -This Vault token must have raed access (and should not have write access) to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. +This Vault token must have read access (and should not have write access) to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. From fd283a660b1a24779d5d21d452035bde37888953 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 9 Apr 2024 08:38:46 -0700 Subject: [PATCH 4/7] Improve some docstrings Add better explanations of the GitHub-specific output code and the command execution layer. --- src/phalanx/github.py | 3 ++- src/phalanx/storage/command.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/phalanx/github.py b/src/phalanx/github.py index c0ffd1bc3a..9af9e53749 100644 --- a/src/phalanx/github.py +++ b/src/phalanx/github.py @@ -2,7 +2,8 @@ The utility functions in this module can all be called unconditionally. They will detect whether the Phalanx command-line tool is being run under GitHub -Actions and suppress the GitHub-specific command output if not. +Actions and, if so, add additional GitHub-specific markers to the output to +improve display in GitHub Actions logs. """ from __future__ import annotations diff --git a/src/phalanx/storage/command.py b/src/phalanx/storage/command.py index 941d0c402b..75f88b7f7f 100644 --- a/src/phalanx/storage/command.py +++ b/src/phalanx/storage/command.py @@ -33,7 +33,7 @@ def __init__(self, command: str) -> None: def capture( self, *args: str, cwd: Path | None = None ) -> subprocess.CompletedProcess: - """Run Helm, checking for errors and capturing the output. + """Run the command, checking for errors and capturing the output. This method should only be called by subclasses, which should provide a higher-level interface used by the rest of the program. @@ -81,8 +81,10 @@ def run( ) -> None: """Run the command with the provided arguments. - This method should only be called by subclasses, which should provide - a higher-level interface used by the rest of the program. + Standard output and standard error are not redirected and will go to + the standard output and error of the caller. This method should only + be called by subclasses, which should provide a higher-level interface + used by the rest of the program. Parameters ---------- From a2534d73d2175e0e9e180c64994a4528e0fafcf8 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 9 Apr 2024 12:42:38 -0700 Subject: [PATCH 5/7] Increase sync timeout in installer Wait for a minute instead of 30 seconds for the sync of each infrastructure application. --- src/phalanx/services/environment.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/phalanx/services/environment.py b/src/phalanx/services/environment.py index af6ae14ef7..79256c7e62 100644 --- a/src/phalanx/services/environment.py +++ b/src/phalanx/services/environment.py @@ -172,7 +172,9 @@ def install( "gafaelfawr", ): if application in environment.applications: - self._argocd.sync(application) + self._argocd.sync( + application, timeout=timedelta(minutes=1) + ) # Sync everything else. with action_group("Sync remaining applications"): From c820e883203ede1279f9bfff7e1eced4e103dd0b Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 9 Apr 2024 13:12:53 -0700 Subject: [PATCH 6/7] Fix obtaining the current Git branch When running in the merge queue, we do fall back on Git operations to determine the current branch. Return the branch name, not a Head object. --- src/phalanx/storage/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phalanx/storage/config.py b/src/phalanx/storage/config.py index 8f90cc76d7..86e3340a39 100644 --- a/src/phalanx/storage/config.py +++ b/src/phalanx/storage/config.py @@ -322,7 +322,7 @@ def get_git_branch(self) -> str: str Branch name. """ - return Repo(str(self._path)).active_branch + return Repo(str(self._path)).active_branch.name def get_git_url(self) -> str: """Get the Git URL of the current repository. From eb44ffd4bef5d371026c595a83798d3f40bde0db Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 9 Apr 2024 13:28:46 -0700 Subject: [PATCH 7/7] Further increase installer timeouts We're still getting timeouts in GitHub Actions. Increase the timeouts for Helm and Argo CD operations even further. --- src/phalanx/services/environment.py | 4 +--- src/phalanx/storage/argocd.py | 2 +- src/phalanx/storage/helm.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/phalanx/services/environment.py b/src/phalanx/services/environment.py index 79256c7e62..af6ae14ef7 100644 --- a/src/phalanx/services/environment.py +++ b/src/phalanx/services/environment.py @@ -172,9 +172,7 @@ def install( "gafaelfawr", ): if application in environment.applications: - self._argocd.sync( - application, timeout=timedelta(minutes=1) - ) + self._argocd.sync(application) # Sync everything else. with action_group("Sync remaining applications"): diff --git a/src/phalanx/storage/argocd.py b/src/phalanx/storage/argocd.py index d14f6d6ec0..cd4c4c5231 100644 --- a/src/phalanx/storage/argocd.py +++ b/src/phalanx/storage/argocd.py @@ -136,7 +136,7 @@ def set_project(self, application: str, project: Project) -> None: ) def sync( - self, application: str, *, timeout: timedelta = timedelta(seconds=30) + self, application: str, *, timeout: timedelta = timedelta(minutes=2) ) -> None: """Sync a specific Argo CD application. diff --git a/src/phalanx/storage/helm.py b/src/phalanx/storage/helm.py index fbb497978c..274d236b3d 100644 --- a/src/phalanx/storage/helm.py +++ b/src/phalanx/storage/helm.py @@ -357,7 +357,7 @@ def upgrade_application( environment: str, values: dict[str, str], *, - timeout: timedelta = timedelta(seconds=60), + timeout: timedelta = timedelta(minutes=2), ) -> None: """Install or upgrade an application using Helm.