diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b90aa8a0b2..99be62e916 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,10 +71,10 @@ jobs: cache-key-prefix: helm # The minikube job always runs, but it quickly does nothing if no files that - # would affect minikube were changed. This unfortunately requires a lot of + # would affect minikube were changed. This unfortunately requires a lot of # if conditionals on all the steps of the job, but we need the job to run so # that we can make it mandatory before merging, which in turn allows us to - # use automerge. + # use merge queues. minikube: name: Test deploy runs-on: ubuntu-latest @@ -99,7 +99,18 @@ jobs: - "environments/Chart.yaml" - "environments/templates/applications/infrastructure/*" - "environments/values-minikube.yaml" - - "installer/**" + - "src/phalanx/**" + + - name: Download installer dependencies + if: steps.filter.outputs.minikube == 'true' + run: | + curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.15.4/vault_1.15.4_linux_amd64.zip + unzip /tmp/vault.zip + sudo mv vault /usr/local/bin/vault + sudo chmod +x /usr/local/bin/vault + sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.8.6/argocd-linux-amd64 + sudo chmod +x /usr/local/bin/argocd + sudo apt-get install socat - name: Setup Minikube if: steps.filter.outputs.minikube == 'true' @@ -113,23 +124,18 @@ jobs: if: steps.filter.outputs.minikube == 'true' run: kubectl get nodes - - name: Download installer dependencies + - uses: lsst-sqre/run-tox@v1 if: steps.filter.outputs.minikube == 'true' - run: | - curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.15.4/vault_1.15.4_linux_amd64.zip - unzip /tmp/vault.zip - sudo mv vault /usr/local/bin/vault - sudo chmod +x /usr/local/bin/vault - sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.8.6/argocd-linux-amd64 - sudo chmod +x /usr/local/bin/argocd - sudo apt-get install socat - - - name: Run installer timeout-minutes: 15 - if: steps.filter.outputs.minikube == 'true' - run: | - cd installer - ./install.sh minikube "${{ secrets.MINIKUBE_VAULT_ROLE_ID }}" "${{ secrets.MINIKUBE_VAULT_SECRET_ID }}" + with: + python-version: "3.12" + tox-envs: install + tox-posargs: >- + --force-noninteractive + --vault-role-id=${{ secrets.MINIKUBE_VAULT_ROLE_ID }} + --vault-secret-id=${{ secrets.MINIKUBE_VAULT_SECRET_ID }} + minikube + cache-key-prefix: test - name: Get final list of resources if: steps.filter.outputs.minikube == 'true' diff --git a/.gitignore b/.gitignore index 424a99fcb7..a553144547 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -/installer/secrets/ -/installer/docker-creds /applications/*/charts/*.tgz /applications-expanded/ **/Chart.lock diff --git a/README.md b/README.md index 8d82cd2a83..d32c7ff799 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Phalanx This is the Argo CD repository for the Rubin Science Platform. -It stores the root Argo CD application, deployment configuration for the other applications, the installer, and other helper scripts. +It stores the root Argo CD application, deployment configuration for the other applications, and a command-line tool to manage Phalanx environments. See [phalanx.lsst.io](https://phalanx.lsst.io/) for full documentation. diff --git a/applications/argocd/secrets.yaml b/applications/argocd/secrets.yaml index 165335a81b..774814ab49 100644 --- a/applications/argocd/secrets.yaml +++ b/applications/argocd/secrets.yaml @@ -2,9 +2,9 @@ description: >- Admin password for Argo CD. This password is normally not used because Argo CD is configured to use Keycloak, Google, or GitHub authentication, - but it is used by the installer (which cannot use external authentication) - and is useful as a fallback if external authentication is not working for - some reason. This secret can be changed at any time. + but it is used during installation and is useful as a fallback if external + authentication is not working for some reason. This secret can be changed + at any time. generate: type: password "admin.password": diff --git a/docs/about/repository.rst b/docs/about/repository.rst index 89c25a23aa..ad5104650b 100644 --- a/docs/about/repository.rst +++ b/docs/about/repository.rst @@ -55,17 +55,6 @@ Each environment then has a file named :file:`values-{environment}.yaml` that de The templates directory also contains the Argo CD ``AppProject`` resources, which are used to classify the applications into groups for access control. -installer directory -------------------- - -:bdg-link-primary-line:`Browse installer/ on GitHub ` - -This directory contains a script named `install.sh `__. -The arguments to this are the name of the environment, the Vault RoleID, and the Vault SecretID (see :ref:`secrets` for more details on Vault). -This installer script is the entry point for setting up a new environment. -It can also be run on an existing environment to update it. -See the :ref:`environment bootstrapping documentation ` for details. - charts directory ---------------- @@ -136,7 +125,7 @@ In one check, Pre-commit regenerates Helm chart documentation for applications w See the `.pre-commit-config.yaml `__ file for configuration details. Learn how to set up Pre-commit in your local editing environment in :doc:`local-environment-setup`. -Second, GitHub Actions runs a CI workflow (`.github/workflows/ci.yaml `__). +Second, GitHub Actions runs a CI workflow (`.github/workflows/ci.yaml `__). This workflow has four key jobs: - Linting with Pre-commit_, mirroring the local editing environment. diff --git a/docs/admin/create-environment.rst b/docs/admin/create-environment.rst new file mode 100644 index 0000000000..d586bb3399 --- /dev/null +++ b/docs/admin/create-environment.rst @@ -0,0 +1,55 @@ +################################ +Create a new Phalanx environment +################################ + +Each separate installation of Phalanx is called an environment. +An environment has a hostname, Vault server and path to its secrets, and a set of Phalanx applications that should be installed in that environment. + +Each Phalanx environment must be installed in a separate Kubernetes cluster. +Two Phalanx environments cannot coexist in the same cluster. + +Before starting this process, ensure that you have met the :doc:`requirements to run Phalanx ` and that you have decided on your :doc:`handling of hostnames and TLS `. + +Creating an environment +======================= + +To create a new Phalanx environment, take the following steps: + +.. rst-class:: open + +#. Fork the `Phalanx repository`_ if this work is separate from the SQuaRE-managed environments. + +#. Create a new :file:`values-{environment}.yaml` file in `environments `__. + Start with a template copied from an existing environment that's similar to the new environment. + Edit it so that ``name``, ``fqdn``, ``vaultUrl``, and ``vaultPathPrefix`` at the top match your new environment. + You may omit ``vaultUrl`` for SQuaRE-managed environments. + See :doc:`secrets-setup` for more information about the latter two settings and additional settings you may need. + If the environment will be hosted on Google Kubernetes Engine, also fill out ``gcp.projectId``, ``gcp.region``, and ``gcp.clusterName`` with metadata about where the environment will be hosted. + Enable the applications this environment should include. + +#. Do what DNS setup you can. + If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. + If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. + +#. Decide on your approach to user home directory storage. + The Notebook Aspect (the ``nublado`` application) requires a POSIX file system. + The most frequently used method of providing that file system is NFS mounts, but you may instead want to use persistent volume claims or a different file system that's mounted on the Kubernetes cluster nodes and exposed to pods via ``hostPath``. + Whatever storage you choose, you will need to configure appropriate mount points in :px-app:`nublado` when you configure each application in the next step. + +#. For each enabled application, create a corresponding :file:`values-{environment}.yaml` file in the relevant directory under `applications `__. + Customization will vary from application to application. + The following applications have special bootstrapping considerations: + + - :px-app-bootstrap:`argocd` + - :px-app-bootstrap:`gafaelfawr` + - :px-app-bootstrap:`nublado` + - :px-app-bootstrap:`portal` + - :px-app-bootstrap:`squareone` + +#. Add the URL of your new environment to :file:`docs/documenteer.toml` under ``phinx.linkcheck.ignore``. + The Argo CD URL of your environment will be unreachable, so you need to tell Sphinx valid link checking to ignore it. + +Next steps +========== + +- Define the secrets for your new environment and store them in Vault: :doc:`secrets-setup` diff --git a/docs/admin/hostnames.rst b/docs/admin/hostnames.rst index c1a55eb5df..3045c95f00 100644 --- a/docs/admin/hostnames.rst +++ b/docs/admin/hostnames.rst @@ -24,7 +24,13 @@ To use the first approach, you must have the following: * An :abbr:`AWS (Amazon Web Services)` account in which you can create two Route 53 hosted domains. You must use this domain for the hostname of the Science Platform installation. + * The ability to delegate to that Route 53 hosted domain from some public DNS domain. This means either registering a domain via Amazon, registering a domain elsewhere and pointing it to Amazon's Route 53 DNS servers, or creating a subdomain of an existing public domain by adding ``NS`` records to that domain for a subdomain hosted on Route 53. If neither of those requirements sound familiar, you almost certainly want to use the second option and purchase a commercial certificate. + +Next steps +========== + +- Define the configuration for your new Phalanx environment: :doc:`create-environment` diff --git a/docs/admin/index.rst b/docs/admin/index.rst index a7ac396f4a..21c7ef39a5 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -17,6 +17,7 @@ Administrators operate infrastructure, manage secrets, and are involved in the d requirements hostnames + create-environment secrets-setup installation diff --git a/docs/admin/installation.rst b/docs/admin/installation.rst index 0768857eae..92e9924eb8 100644 --- a/docs/admin/installation.rst +++ b/docs/admin/installation.rst @@ -2,84 +2,81 @@ Installing a Phalanx environment ################################ -Each separate installation of Phalanx is called an environment. -An environment has a hostname, Vault server and path to its secrets, and a set of Phalanx applications that should be installed in that environment. - +Once you have :doc:`created the configuration for your new environment ` and :doc:`set up secrets `, you are ready to do the installation. Before starting this process, ensure that you have met the :doc:`requirements to run Phalanx `. -Then, set up the required secrets for your new environment as documented in :doc:`secrets-setup`. If you are setting up an environment that will be running a 1Password Connect server for itself, you will need to take special bootstrapping steps. See :px-app-bootstrap:`onepassword-connect` for more information. -Creating an environment -======================= +Installing Phalanx +================== -To create a new Phalanx environment, take the following steps: +Follow these steps to install Phalanx. +These can be run repeatedly to reinstall Phalanx over an existing deployment. -.. rst-class:: open +#. Create a Vault AppRole that will be used by Vault Secrets Operator. + Set the ``VAULT_TOKEN`` environment variable to a token with the ability to create new AppRoles (for SQuaRE clusters, use the admin token), and then run: -#. Fork the `Phalanx repository`_ if this work is separate from the SQuaRE-managed environments. + .. prompt:: bash -#. Create a new :file:`values-{environment}.yaml` file in `environments `__. - Start with a template copied from an existing environment that's similar to the new environment. - Edit it so that ``name``, ``fqdn``, ``vaultUrl``, and ``vaultPathPrefix`` at the top match your new environment. - You may omit ``vaultUrl`` for SQuaRE-managed environments. - See :doc:`secrets-setup` for more information about the latter two settings and additional settings you may need. - If the environment will be hosted on Google Kubernetes Engine, also fill out ``gcp.projectId``, ``gcp.region``, and ``gcp.clusterName`` with metadata about where the environment will be hosted. - Enable the applications this environment should include. + phalanx vault create-read-approle -#. Decide on your approach to TLS certificates. - See :doc:`hostnames` for more details. - This may require DNS configuration in Route 53 if this is the first deployment in a new domain and you are using Let's Encrypt for certificates. + Unset ``VAULT_TOKEN`` when this command finishes. -#. Do what DNS setup you can. - If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. - If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. + Be aware that this will invalidate any existing AppRole for that environment. -#. Decide on your approach to user home directory storage. - The Notebook Aspect (the ``nublado`` application) requires a POSIX file system. - The most frequently used method of providing that file system is NFS mounts, but you may instead want to use persistent volume claims or a different file system that's mounted on the Kubernetes cluster nodes and exposed to pods via ``hostPath``. - Whatever storage you choose, you will need to configure appropriate mount points in :px-app:`nublado` when you configure each application in the next step. +#. Set the environment variables ``VAULT_ROLE_ID`` and ``VAULT_SECRET_ID`` to the Role ID and Secret ID printed out by that command. -#. For each enabled application, create a corresponding :file:`values-{environment}.yaml` file in the relevant directory under `applications `__. - Customization will vary from application to application. - The following applications have special bootstrapping considerations: +#. Ensure that your default Kubernetes cluster for :command:`kubectl` and :command:`helm` is set to point to the Kubernetes cluster into which you want to install the Phalanx environment. + You can verify this with :command:`kubectl config current-context`. - - :px-app-bootstrap:`argocd` - - :px-app-bootstrap:`gafaelfawr` - - :px-app-bootstrap:`nublado` - - :px-app-bootstrap:`portal` - - :px-app-bootstrap:`squareone` +#. Start the install: -#. Add the URL of your new environment to :file:`docs/documenteer.toml` under ``phinx.linkcheck.ignore``. - The Argo CD URL of your environment will be unreachable, so you need to tell Sphinx valid link checking to ignore it. + .. prompt:: bash -Installing Phalanx -================== + phalanx environment install -Once you have defined a Phalanx environment, follow these steps to install it. -These can be run repeatedly to reinstall Phalanx over an existing deployment. + You will be prompted to confirm that you want to proceed. -#. Create a Vault AppRole that will be used by Vault Secrets Operator. +#. If the installation is using a dynamically-assigned IP address, you will need to set up the A record (and AAAA record if using IPv6) in DNS once that address has been assigned. + Wait until the ``ingress-nginx`` application has been installed, which happens after Argo CD has been installed but before most applications are synced. + Then, wait for it to be assigned an external IP address. + Obtain that IP address with :command:`kubectl get -n ingress-nginx service` (look for the external IP). + Then, set the A record in DNS for your environment to that address. + For installations that are intended to be long-lived and that can reliably request the same address, add that IP address to the :file:`values-{environment}.yaml` file in :file:`applications/ingress-nginx` for your environment. + The setting to use is ``ingress-nginx.controller.service.loadBalancerIP``. + This ensures that ingress-nginx will always request that address. - .. prompt:: bash +#. If you are deploying on Google Cloud Platform, consider converting the dynamically-assigned IP address to a static IP. + You can do this in the GCP console under :menuselection:`VPC Network -> IP addresses`. - phalanx vault create-read-approle +#. Debug any problems during installation. + The most common source of problems are errors or missing configuration in the :file:`values-{environment}.yaml` files you created for each application. + You can safely run the installer repeatedly as you debug and fix issues. - Be aware that this will invalidate any existing AppRole for that environment. +Using a Vault token rather than AppRole +======================================= -#. Run the installer script at `installer/install.sh `__. +The default and recommended installation approach is to use a Vault AppRole for vault-secrets-operator to authenticate to Vault. +However, using a read-only Vault token is still supported. - .. prompt:: bash +To use a Vault token instead of an AppRole, create an appropriate read-only token with access to the Vault path configured in :file:`enviroments/values-{environment}.yaml` for your environment. +Skip step 1 in the normal installation process, since you don't need to create an AppRole. +In step 2, set ``VAULT_TOKEN`` to the read-only token and do not set ``VAULT_ROLE_ID`` or ``VAULT_SECRET_ID``. +Then continue the regular installation process. - installer/install.sh +Troubleshooting tools +===================== - ```` and ```` are the Role ID and Secret ID of the Vault AppRole created in the previous step. +The tools to use for troubleshooting will vary depending on how far the installer has gotten. - Debug any problems. - The most common source of problems are errors or missing configuration in the :file:`values-{environment}.yaml` files you created for each application. - You can safely run the installer repeatedly as you debug and fix issues. +- If something fails before Argo CD is installed, you will need to use :command:`kubectl` to look around in Kubernetes, retrieve logs, and look at error messages. + +- If Argo CD is installed and working, but ingress-nginx fails, you can additionally use the :command:`argocd` command-line tool. + The installer will have created login credentials for Argo CD as the admin user for you, so you shouldn't need to do that again. + Pass the flags ``--port-forward --port-forward-namespace argocd`` to :command:`argocd` to proxy to the Argo CD server without needing to have the ingress working. -#. If the installation is using a dynamically-assigned IP address, while the installer is running, wait until the ingress-nginx-controller service comes up and has an external IP address. - Then, set the A record for your endpoint to that address (or set an A record with that IP address for the ingress and a CNAME from the endpoint to the A record). - For installations that are intended to be long-lived, it is worth capturing this IP address at this point and modifying the ``ingress-nginx`` configuration to use it statically should you ever need to reinstall the instance. +- If the ingress was successfully installed and you've created the DNS record for your environment, you can use the Argo CD web UI the same as you would with a fully-installed cluster. + If your Argo CD authentication configuration is working (see :doc:`/applications/argocd/authentication`), you can log in as you normally would. + If it is not, you will need to use the admin password. + You can get this from Vault in the ``admin.plaintext_password`` key of the ``argocd`` secret. diff --git a/docs/admin/migrating-secrets.rst b/docs/admin/migrating-secrets.rst index 10c20d9197..5ae14d5a6a 100644 --- a/docs/admin/migrating-secrets.rst +++ b/docs/admin/migrating-secrets.rst @@ -3,7 +3,7 @@ Migrating to the new secrets management system ############################################## We introduced a new command-line-driven secrets management system for Phalanx environments in September of 2023. -This page documents how to migrate to the new system from the older scripts in :file:`installer`. +This page documents how to migrate to the new system. These instructions assume that, if you are using 1Password for static secrets, you have already set up a 1Password vault and enabled the :px-app:`1Password Connect server ` for this environment. If you have not yet done this, see :doc:`/applications/onepassword-connect/add-new-environment`. diff --git a/docs/admin/requirements.rst b/docs/admin/requirements.rst index e024ae6b29..5d8611273e 100644 --- a/docs/admin/requirements.rst +++ b/docs/admin/requirements.rst @@ -54,7 +54,7 @@ For installing an environment, you will also need the following tools: .. warning:: - Although the Argo CD command-line client must be installed to use the Phalanx installer, do not use it to create applications. + Although the Argo CD command-line client must be installed to install Phalanx in a new environment, do not use it to create applications. All Argo CD applications should be managed through Phalanx and the ``science-platform`` app of apps. - The Vault command-line client. @@ -62,3 +62,9 @@ For installing an environment, you will also need the following tools: To see the version currently used for testing, search for ``vault_`` in `.github/workflows/ci.yaml `__. - Git 2.22 or later. + +Next steps +========== + +- Decide on how you plan to handle hostnames and TLS: :doc:`hostnames` +- Create the configuration for your new Phalanx environment: :doc:`create-environment` diff --git a/docs/applications/argocd/bootstrap.rst b/docs/applications/argocd/bootstrap.rst index ad9c177cec..131550eb51 100644 --- a/docs/applications/argocd/bootstrap.rst +++ b/docs/applications/argocd/bootstrap.rst @@ -9,7 +9,7 @@ Authentication Initial installation of the Rubin Science Platform is done using Argo CD and a static password for the ``admin`` account. You can then log on to the ``admin`` account using that password to manage the resulting environment. -The password is available as the ``admin.plaintext_password`` key in Vault secret for the ``argocd`` application, and in the ``Secret`` resource named ``argocd-secret`` in the ``argocd`` namespace after installation of the environment. +The password is available as the ``admin.plaintext_password`` key in Vault secret for the ``argocd`` application. As part of bootstrapping a new environment, you should also configure per-user authentication. To do this, follow the instructions in :doc:`authentication`. diff --git a/docs/applications/argocd/upgrade.rst b/docs/applications/argocd/upgrade.rst index e309b99a3f..70d247f601 100644 --- a/docs/applications/argocd/upgrade.rst +++ b/docs/applications/argocd/upgrade.rst @@ -71,11 +71,13 @@ Only use this process if the automatic upgrade failed or if there are documented .. code-block:: sh - cd phalanx/installer - helm upgrade --install argocd argo/argo-cd --version $VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + cd applications + helm upgrade --install argocd argocd \ + --values argocd/values.yaml --values argocd/values-$ENVIRONMENT.yaml \ + --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ + --namespace argocd --wait --timeout 900s - Replace ``$VERSION`` with the Helm chart version (**not** the Argo CD application version) that you want to install. + Replace ``$ENVIRONMENT`` with the name of the Phalanx environment you're attempting to repair, and ``$VAULT_PATH_PREFIX`` with the Vault path prefix (from :file:`environments/values-{environment}.yaml`) for that environment. If all goes well, you can now view the UI at ``/argo-cd`` and confirm that everything still looks correct. @@ -106,17 +108,18 @@ You can then recreate the namespace, reinstall Argo CD, and restore the backup: .. code-block:: sh - kubectl create namespace argocd - cd phalanx/installer - helm upgrade --install argocd argo/argo-cd --version $HELM_VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + cd applications + helm upgrade --install argocd argocd \ + --values argocd/values.yaml --values argocd/values-$ENVIRONMENT.yaml \ + --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ + --namespace argocd --create-namespace --wait --timeout 900s chmod 644 ~/.kube/config docker run -i -v ~/.kube:/home/argocd/.kube --rm \ argoproj/argocd:$VERSION argocd-util import -n argocd - < backup.yaml chmod 600 ~/.kube/config -Replace ``$HELM_VERSION`` with the version of the Helm chart you want to use and ``$VERSION`` with the corresponding Argo CD version (as shown via ``helm search repo``). +Replace ``$ENVIRONMENT`` with the name of the Phalanx environment you're attempting to repair, and ``$VAULT_PATH_PREFIX`` with the Vault path prefix (from :file:`environments/values-{environment}.yaml`) for that environment. This should hopefully restore Argo CD to a working state. -If it doesn't, you'll need to reinstall it using the more extended process used by the cluster installer. -See `installer/install.sh `__ for the commands to run. +If it doesn't, you'll need to reinstall it using the more extended process used by :command:`phalanx environment install`. +See :doc:`/admin/installation` for that process. diff --git a/docs/applications/vault-secrets-operator/bootstrap.rst b/docs/applications/vault-secrets-operator/bootstrap.rst index e41195add0..ec62a8a0d5 100644 --- a/docs/applications/vault-secrets-operator/bootstrap.rst +++ b/docs/applications/vault-secrets-operator/bootstrap.rst @@ -4,10 +4,15 @@ Bootstrapping vault-secrets-operator #################################### -Vault Secrets Operator is the only component of the Science Platform whose secret has to be manually created, so that it can create the secrets for all other applications. -This will be done automatically by the `install script `__. +Because it is the application that manages all of the other secrets in Phalanx, the secret for vault-secrets-operator itself, containing its Vault credentials, requires special handling. +It is normally created as the first step of a Phalanx bootstrap by the :doc:`installer `. -When using the newer, recommended :ref:`secrets management system `, the secret created by the installer will look like this: +This secret (``vault-credentials`` in the ``vault-secrets-operator`` namespace) exists only as a normal ``Secret`` resource and is not managed by Argo CD, so it will not appear in the Argo CD dashboard for the vault-secrets-operator application. + +AppRole authentication +====================== + +When using the newer, recommended :ref:`secrets management system `, vault-secrets-operator's secret looks like this: .. code-block:: yaml @@ -21,9 +26,30 @@ When using the newer, recommended :ref:`secrets management system type: Opaque -This secret will normally be created by either the installer or :command:`phalanx vault create-read-approle`. +This secret will normally be created by either the installer or by piping :command:`phalanx vault create-read-approle --as-secret vault-credentials` into :command:`kubectl apply`. +This is the default configuration of vault-secrets-operator. + +Token authentication +==================== + +Using a regular Vault token is still supported, but requires special per-environment configuration for vault-secrets-operator. +Put the following into :file:`applications/vault-secrets-operator/values-{environment}.yaml`: -Using a regular Vault token is still supported, in which case the secret will look like this: +.. code-block:: yaml + + vault-secrets-operator: + environmentVars: + - name: "VAULT_TOKEN" + valueFrom: + secretKeyRef: + name: "vault-secrets-operator" + key: "VAULT_TOKEN" + - name: "VAULT_TOKEN_LEASE_DURATION" + value: "31536000" # One year + vault: + authMethod: "token" + +In this case, the created secret will look like: .. code-block:: yaml @@ -36,6 +62,5 @@ Using a regular Vault token is still supported, in which case the secret will lo VAULT_TOKEN: type: Opaque -This secret will be created by the installer when given a ``VAULT_TOKEN`` parameter. - -In either case, the Vault token or AppRole must have read access to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. +This secret will be created by the installer when ``VAULT_TOKEN`` is set in the environment instead of ``VAULT_ROLE_ID`` and ``VAULT_SECRET_ID``. +This Vault token must have read access (and should not have write access) to the Vault path configured in :file:`environments/values-{environment}.yaml` for your environment. diff --git a/docs/internals/api.rst b/docs/internals/api.rst index 6166bcd1f8..a608ffd85b 100644 --- a/docs/internals/api.rst +++ b/docs/internals/api.rst @@ -25,6 +25,9 @@ This API is only intended for use within the Phalanx code itself. .. automodapi:: phalanx.factory :include-all-objects: +.. automodapi:: phalanx.github + :include-all-objects: + .. automodapi:: phalanx.models.applications :include-all-objects: @@ -55,12 +58,21 @@ This API is only intended for use within the Phalanx code itself. .. automodapi:: phalanx.services.vault :include-all-objects: +.. automodapi:: phalanx.storage.argocd + :include-all-objects: + +.. automodapi:: phalanx.storage.command + :include-all-objects: + .. automodapi:: phalanx.storage.config :include-all-objects: .. automodapi:: phalanx.storage.helm :include-all-objects: +.. automodapi:: phalanx.storage.kubernetes + :include-all-objects: + .. automodapi:: phalanx.storage.onepassword :include-all-objects: diff --git a/installer/install.sh b/installer/install.sh deleted file mode 100755 index 78059e2016..0000000000 --- a/installer/install.sh +++ /dev/null @@ -1,312 +0,0 @@ -#!/bin/bash -e - -################################################################################ -# install.sh - Install script for the Rubin Science Platform -################################################################################ - -# Usage: -# ./install.sh ENVIRONMENT=env [VAULT_ROLE_ID= VAULT_SECRET_ID= | VAULT_TOKEN= - -# Arguments -# - The environment variable is mandatory and should be provided as the first argument. -# - If two positional arguments are provided, assume they are VAULT_ROLE_ID and VAULT_SECRET_ID. -# - If named arguments are provided, parse them for ENVIRONMENT, VAULT_ROLE_ID, VAULT_SECRET_ID, and VAULT_TOKEN. - -# Environment Configuration: -# The environment configuration is retrieved from ../environments/values-${ENVIRONMENT}.yaml. - -# Usage Examples: -# Using authentication with an approle: -# ./install.sh ENVIRONMENT=myenv VAULT_ROLE_ID=your-vault-id VAULT_SECRET_ID=your-secret -# ./install.sh myenv your-vault-id your-secret # Uses VAULT_ROLE_ID and VAULT_SECRET_ID -# -# Using authentication with a token: -# ./install.sh ENVIRONMENT=myenv VAULT_TOKEN=your-vault-token -# ./install.sh ENVIRONMENT=myenv VAULT_TOKEN=your-vault-token -# ./install.sh myenv VAULT_TOKEN=your-vault-token - -# Script Dependencies: -# - yq: Used for parsing YAML files. -# - vault: Used for interacting with HashiCorp Vault. -# - kubectl: Kubernetes command-line tool. -# - helm: Kubernetes package manager. - -# Notes: -# - The script assumes that the specified environment exists, and is available under ../environments/ and has the required charts under ../applications/ -# - It creates or updates Argo CD and Vault secrets based on the provided credentials. - -# Exit codes: -# - 0: Success -# - 1: Error - -################################################################################ - -USAGE="Usage: ./install.sh ENVIRONMENT=env [VAULT_ROLE_ID= VAULT_SECRET_ID= | VAULT_TOKEN=]" - -unset ENVIRONMENT -unset VAULT_ROLE_ID -unset VAULT_TOKEN -unset VAULT_SECRET_ID - -# Function to display usage and exit -display_usage() { - echo "$USAGE" - exit 1 -} - -# Function to create Kubernetes secret -create_kubernetes_secret() { - local namespace="$1" - shift - kubectl create secret generic vault-credentials \ - --namespace "$namespace" \ - --dry-run=client -o yaml "$@" | kubectl apply -f - -} - -# Function to check for dependencies -check_dependencies() { - local dependencies=("yq" "vault" "kubectl" "helm") - - for cmd in "${dependencies[@]}"; do - if ! command -v "$cmd" > /dev/null 2>&1; then - echo "Error: $cmd not found. Please install $cmd and try again." - exit 1 - fi - done -} - -# Check that the dependencies are installed -check_dependencies - -# Extract environment -if [[ $1 == ENVIRONMENT=* ]]; then - ENVIRONMENT="${1#*=}" - shift -elif [[ $1 =~ ^[a-zA-Z0-9_-]+$ ]]; then - ENVIRONMENT="$1" - shift -else - display_usage -fi - -# Extract named arguments -for arg in "$@"; do - case "$arg" in - environment=*) - ENVIRONMENT="${arg#*=}" - ;; - VAULT_ROLE_ID=*|vault_role_id=*) - VAULT_ROLE_ID="${arg#*=}" - ;; - VAULT_SECRET_ID=*|vault_secret_id=*) - VAULT_SECRET_ID="${arg#*=}" - ;; - VAULT_TOKEN=*|vault_token=*) - VAULT_TOKEN="${arg#*=}" - ;; - *) - ;; - esac -done - - -# If VAULT_ROLE_ID and VAULT_SECRET_ID are not set from named arguments, check positional arguments -if [ -z "$VAULT_ROLE_ID" ] || [ -z "$VAULT_SECRET_ID" ]; then - # If two positional arguments are provided, assume they are VAULT_ROLE_ID and VAULT_SECRET_ID - if [ $# -ge 2 ]; then - VAULT_ROLE_ID=$1 - VAULT_SECRET_ID=$2 - shift 2 - fi -fi - -# Get environment configuration -config="../environments/values-${ENVIRONMENT}.yaml" - -echo "Getting Git branch and remote information..." -GIT_URL=$(git config --get remote.origin.url) -# Github runs in a detached head state, but sets GITHUB_REF, -# extract the branch from it. If we're there, use that branch. -# git branch --show-current will return empty in deatached head. -GIT_BRANCH=${GITHUB_HEAD_REF:-$(git branch --show-current)} - -echo "Logging on to Vault..." - -VAULT_ADDR="" -if grep '^vaultUrl:' "$config" >/dev/null; then - VAULT_ADDR=$(yq -r .vaultUrl "$config") -else - VAULT_ADDR=$(yq -r .vaultUrl ../environments/values.yaml) -fi - -export VAULT_ADDR=$VAULT_ADDR - -# Check if VAULT_ROLE_ID and VAULT_SECRET_ID are provided, if so generate VAULT_TOKEN -if [ -n "$VAULT_ROLE_ID" ] && [ -n "$VAULT_SECRET_ID" ]; then - # If VAULT_TOKEN is not provided, generate it using VAULT_ROLE_ID and VAULT_SECRET_ID - if [ -z "$VAULT_TOKEN" ]; then - VAULT_TOKEN=$(vault write auth/approle/login role_id="$VAULT_ROLE_ID" secret_id="$VAULT_SECRET_ID" | grep 'token ' | awk '{ print $2 }') - fi -fi - -# Check if VAULT_ROLE_ID and VAULT_SECRET_ID are not provided, but VAULT_TOKEN is -if [ -z "$VAULT_ROLE_ID" ] || [ -z "$VAULT_SECRET_ID" ]; then - # Check if VAULT_TOKEN is provided - if [ -z "$VAULT_TOKEN" ]; then - echo "Invalid arguments provided. Please provide either VAULT_ROLE_ID and VAULT_SECRET_ID or VAULT_TOKEN." - display_usage - fi -fi - -export VAULT_TOKEN=$VAULT_TOKEN - -VAULT_PATH_PREFIX=$(yq -r .vaultPathPrefix "$config") -ARGOCD_PASSWORD=$(vault kv get --field=admin.plaintext_password "$VAULT_PATH_PREFIX"/argocd) - -echo "Putting Vault credentials in a secret for vault-secrets-operator..." -# The namespace may not exist already, but don't error if it does. -kubectl create ns vault-secrets-operator || true - -# Create Kubernetes secret based on authentication method -if [ -n "$VAULT_ROLE_ID" ] && [ -n "$VAULT_SECRET_ID" ]; then - create_kubernetes_secret "vault-secrets-operator" \ - --from-literal=VAULT_ROLE_ID="$VAULT_ROLE_ID" \ - --from-literal=VAULT_SECRET_ID="$VAULT_SECRET_ID" -elif [ -n "$VAULT_TOKEN" ]; then - create_kubernetes_secret "vault-secrets-operator" \ - --from-literal=VAULT_TOKEN="$VAULT_TOKEN" -else - echo "Invalid arguments provided. Please provide either VAULT_ROLE_ID and VAULT_SECRET_ID or VAULT_TOKEN." - display_usage -fi - -# Argo CD depends a Vault-created secret for its credentials, so -# vault-secrets-operator has to be installed first. -echo "Updating or installing vault-secrets-operator..." -helm dependency update ../applications/vault-secrets-operator -helm upgrade vault-secrets-operator ../applications/vault-secrets-operator \ - --install \ - --values ../applications/vault-secrets-operator/values.yaml \ - --values "../applications/vault-secrets-operator/values-$ENVIRONMENT.yaml" \ - --set "vault-secrets-operator.vault.address=$VAULT_ADDR" \ - --create-namespace \ - --namespace vault-secrets-operator \ - --timeout 5m \ - --wait - -echo "Updating or installing Argo CD using Helm..." -helm dependency update ../applications/argocd -helm upgrade argocd ../applications/argocd \ - --install \ - --values ../applications/argocd/values.yaml \ - --values "../applications/argocd/values-$ENVIRONMENT.yaml" \ - --set "global.vaultSecretsPath=$VAULT_PATH_PREFIX" \ - --create-namespace \ - --namespace argocd \ - --timeout 5m \ - --wait - -echo "Logging in to Argo CD..." -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" - -echo "Creating the top-level Argo CD application..." -argocd app create science-platform \ - --repo "$GIT_URL" \ - --path environments --dest-namespace default \ - --dest-server https://kubernetes.default.svc \ - --upsert \ - --revision "$GIT_BRANCH" \ - --port-forward \ - --port-forward-namespace argocd \ - --helm-set "repoUrl=$GIT_URL" \ - --helm-set "targetRevision=$GIT_BRANCH" \ - --values values.yaml \ - --values "values-$ENVIRONMENT.yaml" - -echo "Syncing the top-level Argo CD application..." -argocd app sync science-platform \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 - -echo "Moving the top-level Argo CD application into infrastructure..." -argocd app set science-platform --project infrastructure \ - --port-forward \ - --port-forward-namespace argocd - -echo "Syncing Argo CD..." -timeout 30 argocd app sync argocd \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 || \ -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" && \ -timeout 30 argocd app sync argocd \ - --port-forward \ - --port-forward-namespace argocd \ - --timeout 30 - -echo "Waiting for Argo CD to finish syncing..." -kubectl -n argocd rollout status deployment/argocd-server -kubectl -n argocd rollout status deployment/argocd-repo-server -kubectl -n argocd rollout status statefulset/argocd-application-controller - -echo "Logging in to Argo CD..." -argocd login \ - --plaintext \ - --port-forward \ - --port-forward-namespace argocd \ - --username admin \ - --password "$ARGOCD_PASSWORD" - -if [ "$(yq -r '.applications."ingress-nginx"' "$config")" != "false" ]; then - echo "Syncing ingress-nginx..." - argocd app sync ingress-nginx \ - --port-forward \ - --port-forward-namespace argocd -fi - -if [ "$(yq -r '.applications."cert-manager"' "$config")" != "false" ]; then - echo "Syncing cert-manager..." - argocd app sync cert-manager \ - --port-forward \ - --port-forward-namespace argocd && \ - - # Wait for the cert-manager's webhook to finish deploying by running - # kubectl, argocd's sync doesn't seem to wait for this to finish. - kubectl -n cert-manager rollout status deployment/cert-manager-webhook -fi - -if [ "$(yq -r .applications.postgres "$config")" == "true" ]; then - echo "Syncing postgres..." - argocd app sync postgres \ - --port-forward \ - --port-forward-namespace argocd -fi - -if [ "$(yq -r .applications.gafaelfawr "$config")" != "false" ]; then - echo "Syncing gafaelfawr..." - argocd app sync gafaelfawr \ - --port-forward \ - --port-forward-namespace argocd -fi - -echo "Syncing remaining applications..." -argocd app sync -l "argocd.argoproj.io/instance=science-platform" \ - --port-forward \ - --port-forward-namespace argocd - -echo '' -echo "You can now check on your Argo CD installation by running:" -echo "kubectl port-forward service/argocd-server -n argocd 8080:443" -echo "For the ArgoCD admin password:" -echo "vault kv get --field=admin.plaintext_password $VAULT_PATH_PREFIX/argocd" diff --git a/pyproject.toml b/pyproject.toml index 98ec5c2ac6..7e1896adf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,6 @@ python_files = [ [tool.ruff] exclude = [ "docs/**", - "installer/**", ] line-length = 79 target-version = "py312" diff --git a/src/phalanx/cli.py b/src/phalanx/cli.py index eb7e518330..03b515aaa6 100644 --- a/src/phalanx/cli.py +++ b/src/phalanx/cli.py @@ -18,6 +18,11 @@ from .models.environments import EnvironmentConfig from .models.helm import HelmStarter from .models.secrets import ConditionalSecretConfig, StaticSecrets +from .models.vault import ( + VaultAppRoleCredentials, + VaultCredentials, + VaultTokenCredentials, +) __all__ = [ "help", @@ -29,6 +34,7 @@ "application_lint_all", "application_template", "environment", + "environment_install", "environment_lint", "environment_schema", "environment_template", @@ -46,6 +52,19 @@ "vault_export_secrets", ] +_INSTALL_WARNING = """\ +WARNING: This will install the entire {environment} Phalanx environment +into whatever Kubernetes cluster is currently configured as your default +cluster. + +THIS WILL OVERWRITE THE APPLICATIONS IN YOUR CURRENT KUBERNETES CLUSTER. + +If you have not verified, with kubectl config current-context, that this is +the correct cluster immediately before running this command, answer no and +double-check the cluster before continuing. +""" +"""Warning message displayed by :command:`phalanx environment install`.""" + def _find_config() -> Path: """Find the root of the Phalanx configuration tree. @@ -334,6 +353,93 @@ def environment() -> None: """Commands for Phalanx environment configuration.""" +@environment.command("install") +@click.argument("environment") +@click.option( + "-c", + "--config", + type=click.Path(path_type=Path), + default=None, + help="Path to root of Phalanx configuration.", +) +@click.option( + "--git-branch", + default=None, + envvar="GITHUB_HEAD_REF", + help="Override Git branch for Argo CD.", +) +@click.option( + "--force-noninteractive", + default=False, + is_flag=True, + help="Force installation without a prompt.", +) +@click.option( + "--vault-role-id", + default=None, + envvar="VAULT_ROLE_ID", + help="Role ID for vault-secrets-operator.", +) +@click.option( + "--vault-secret-id", + default=None, + envvar="VAULT_SECRET_ID", + help="Secret ID for vault-secrets-operator.", +) +@click.option( + "--vault-token", + default=None, + envvar="VAULT_TOKEN", + help="Read-only token for vault-secrets-operator.", +) +def environment_install( + environment: str, + *, + config: Path | None, + force_noninteractive: bool = False, + git_branch: str | None = None, + vault_role_id: str | None = None, + vault_secret_id: str | None = None, + vault_token: str | None = None, +) -> None: + """Install Phalanx into an environment. + + Bootstrap Phalanx for an environment. Assumes that the currently enabled + Kubernetes configuration is the cluster into which to install Phalanx. + + The secrets tree for the environment must already be present in Vault. + Read-only Vault credentials must be supplied by either setting the + environment variables VAULT_ROLE_ID and VAULT_SECRET_ID to the credentials + of a Vault AppRole, or setting VAULT_TOKEN to a read-only Vault token. + """ + if not config: + config = _find_config() + factory = Factory(config) + if vault_role_id and vault_secret_id: + vault_credentials: VaultCredentials = VaultAppRoleCredentials( + role_id=vault_role_id, secret_id=vault_secret_id + ) + elif vault_token: + vault_credentials = VaultTokenCredentials(token=vault_token) + else: + msg = ( + "Either VAULT_TOKEN or both VAULT_ROLE_ID and VAULT_SECRET_ID" + " must be set" + ) + raise click.UsageError(msg) + + # Prompt the user unless they specifically said not to. + if not force_noninteractive: + print(_INSTALL_WARNING.format(environment=environment)) + click.confirm( + "Are you certain you want to continue?", abort=True, default=False + ) + + # Do the installation. + environment_service = factory.create_environment_service() + environment_service.install(environment, vault_credentials, git_branch) + + @environment.command("lint") @click.argument("environment", required=False) @click.option( diff --git a/src/phalanx/exceptions.py b/src/phalanx/exceptions.py index 79628a701b..aa627b29c9 100644 --- a/src/phalanx/exceptions.py +++ b/src/phalanx/exceptions.py @@ -10,6 +10,8 @@ __all__ = [ "ApplicationExistsError", "CommandFailedError", + "CommandTimedOutError", + "GitRemoteError", "InvalidApplicationConfigError", "InvalidEnvironmentConfigError", "InvalidSecretConfigError", @@ -70,6 +72,43 @@ def __init__( self.stderr = exc.stderr +class CommandTimedOutError(Exception): + """Execution of a command failed. + + Parameters + ---------- + command + Command being run. + args + Arguments to that command. + exc + Exception reporting the failure. + + Attributes + ---------- + stdout + Standard output from the failed command. + stderr + Standard error from the failed command. + """ + + def __init__( + self, + command: str, + args: Iterable[str], + exc: subprocess.TimeoutExpired, + ) -> None: + args_str = " ".join(args) + msg = f"{command} {args_str} timed out after {exc.timeout}s" + super().__init__(msg) + self.stdout = exc.stdout + self.stderr = exc.stderr + + +class GitRemoteError(Exception): + """Unable to get necessary information from a Git remote.""" + + class InvalidApplicationConfigError(Exception): """Configuration for an application is invalid. @@ -218,8 +257,13 @@ class VaultNotFoundError(Exception): Base URL of the Vault server. path Path that was not found. + key + If provided, key within that path that was not found. """ - def __init__(self, url: str, path: str) -> None: - msg = f"Vault secret {path} not found in server {url}" + def __init__(self, url: str, path: str, key: str | None = None) -> None: + if key: + msg = f"Vault key {key} not found in secret {path} on server {url}" + else: + msg = f"Vault secret {path} not found in server {url}" super().__init__(msg) diff --git a/src/phalanx/factory.py b/src/phalanx/factory.py index 4e03ed0cf9..8de5f8d1f4 100644 --- a/src/phalanx/factory.py +++ b/src/phalanx/factory.py @@ -8,8 +8,10 @@ from .services.environment import EnvironmentService from .services.secrets import SecretsService from .services.vault import VaultService +from .storage.argocd import ArgoCDStorage from .storage.config import ConfigStorage from .storage.helm import HelmStorage +from .storage.kubernetes import KubernetesStorage from .storage.onepassword import OnepasswordStorage from .storage.vault import VaultStorage @@ -59,8 +61,13 @@ def create_environment_service(self) -> EnvironmentService: Service for manipulating environments. """ config_storage = self.create_config_storage() - helm_storage = HelmStorage(config_storage) - return EnvironmentService(config_storage, helm_storage) + return EnvironmentService( + config_storage=config_storage, + argocd_storage=ArgoCDStorage(), + kubernetes_storage=KubernetesStorage(), + helm_storage=HelmStorage(config_storage), + vault_storage=VaultStorage(), + ) def create_secrets_service(self) -> SecretsService: """Create service for manipulating Phalanx secrets. diff --git a/src/phalanx/github.py b/src/phalanx/github.py new file mode 100644 index 0000000000..9af9e53749 --- /dev/null +++ b/src/phalanx/github.py @@ -0,0 +1,38 @@ +"""Utility functions used when running under GitHub Actions. + +The utility functions in this module can all be called unconditionally. They +will detect whether the Phalanx command-line tool is being run under GitHub +Actions and, if so, add additional GitHub-specific markers to the output to +improve display in GitHub Actions logs. +""" + +from __future__ import annotations + +import os +from collections.abc import Iterator +from contextlib import contextmanager + +__all__ = [ + "action_group", +] + + +@contextmanager +def action_group(title: str) -> Iterator[None]: + """Wrap a sequence of commands in a GitHub Actions group. + + Must be used as a context manager. Any output produced by code that runs + within that context manager will be wrapped into a GitHub Actions display + group with the given title. + + Parameters + ---------- + title + Title of display group. + """ + in_github_actions = os.getenv("GITHUB_ACTIONS") == "true" + if in_github_actions: + print(f"::group::{title}", flush=True) + yield + if in_github_actions: + print("::endgroup::", flush=True) diff --git a/src/phalanx/models/vault.py b/src/phalanx/models/vault.py index c61c1b86ad..261f68f51e 100644 --- a/src/phalanx/models/vault.py +++ b/src/phalanx/models/vault.py @@ -2,6 +2,7 @@ from __future__ import annotations +from abc import ABC, abstractmethod from base64 import b64encode from datetime import datetime @@ -12,8 +13,11 @@ __all__ = [ "VaultAppRole", + "VaultAppRoleCredentials", "VaultAppRoleMetadata", + "VaultCredentials", "VaultToken", + "VaultTokenCredentials", "VaultTokenMetadata", ] @@ -62,6 +66,44 @@ def to_yaml(self) -> str: return yaml.dump(self.model_dump()) +class VaultCredentials(BaseModel, ABC): + """Credentials used for Vault access. + + Can hold either AppRole credentials or a simple token, but always holds + one or the other. + """ + + @abstractmethod + def to_secret_data(self) -> dict[str, str]: + """Construct the corresponding vault-secrets-operator secret.""" + + +class VaultAppRoleCredentials(VaultCredentials): + """Credentials for Vault access using an AppRole.""" + + role_id: str + """Unique identifier of the AppRole.""" + + secret_id: str + """Authentication credentials for the AppRole.""" + + def to_secret_data(self) -> dict[str, str]: + return { + "VAULT_ROLE_ID": self.role_id, + "VAULT_SECRET_ID": self.secret_id, + } + + +class VaultTokenCredentials(VaultCredentials): + """Credentials for Vault access using a token.""" + + token: str + """Vault token.""" + + def to_secret_data(self) -> dict[str, str]: + return {"VAULT_TOKEN": self.token} + + class VaultTokenMetadata(BaseModel): """Metadata about a new or existing Vault token.""" diff --git a/src/phalanx/services/environment.py b/src/phalanx/services/environment.py index f913c69daa..af6ae14ef7 100644 --- a/src/phalanx/services/environment.py +++ b/src/phalanx/services/environment.py @@ -2,8 +2,17 @@ from __future__ import annotations +from datetime import timedelta + +from ..exceptions import CommandFailedError, VaultNotFoundError +from ..github import action_group +from ..models.applications import Project +from ..models.vault import VaultCredentials +from ..storage.argocd import ArgoCDStorage from ..storage.config import ConfigStorage from ..storage.helm import HelmStorage +from ..storage.kubernetes import KubernetesStorage +from ..storage.vault import VaultStorage __all__ = ["EnvironmentService"] @@ -15,15 +24,161 @@ class EnvironmentService: ---------- config_storage Storage object for the Phalanx configuration. + argocd_storage + Interface to Argo CD actions. + kubernetes_storage + Interface to direct Kubernetes object manipulation. helm_storage Interface to Helm actions. + vault_storage + Factory class for Vault clients. """ def __init__( - self, config_storage: ConfigStorage, helm_storage: HelmStorage + self, + *, + config_storage: ConfigStorage, + argocd_storage: ArgoCDStorage, + kubernetes_storage: KubernetesStorage, + helm_storage: HelmStorage, + vault_storage: VaultStorage, ) -> None: self._config = config_storage + self._argocd = argocd_storage + self._kubernetes = kubernetes_storage self._helm = helm_storage + self._vault_storage = vault_storage + + def install( + self, + environment_name: str, + vault_credentials: VaultCredentials, + git_branch: str | None = None, + ) -> None: + """Install a Phalanx environment. + + Parameters + ---------- + environment_name + Environment to install. + vault_credentials + Credentials to use for Vault access. These will be installed in + the cluster as a ``Secret`` used by vault-secrets-operator. + git_branch + Git branch to point Argo CD at. If not given, defaults to the + current branch. + + Raises + ------ + CommandFailedError + Raised if one of the underlying commands fails. + VaultNotFoundError + Raised if a necessary secret was not found in Vault. + """ + environment = self._config.load_environment(environment_name) + vault = self._vault_storage.get_vault_client( + environment, credentials=vault_credentials + ) + + # Get information about the local repository. + git_url = self._config.get_git_url() + if not git_branch: + git_branch = self._config.get_git_branch() + + # Get the plain-text Argo CD admin password from Vault. + argocd_secret = vault.get_application_secret("argocd") + argocd_password = argocd_secret.get("admin.plaintext_password") + if not argocd_password: + raise VaultNotFoundError( + vault.url, f"{vault.path}/argocd", "admin.plaintext_password" + ) + + # Add the dependency repositories of the applications we're installing + # directly with Helm, and refresh the Helm dependency cache. + with action_group("Update Helm dependencies"): + repo_urls = set() + for app_name in ("vault-secrets-operator", "argocd"): + app_urls = self._config.get_dependency_repositories(app_name) + repo_urls.update(app_urls) + for url in sorted(repo_urls): + self._helm.repo_add(url) + self._helm.repo_update() + + # Install vault-secrets-operator. Argo CD depends on this, so it has + # to be installed and configured with its Vault secret first. + with action_group("Install vault-secrets-operator"): + self._kubernetes.create_namespace( + "vault-secrets-operator", ignore_fail=True + ) + self._kubernetes.create_generic_secret( + "vault-credentials", + "vault-secrets-operator", + vault_credentials.to_secret_data(), + ) + self._helm.dependency_update("vault-secrets-operator") + self._helm.upgrade_application( + "vault-secrets-operator", + environment.name, + {"vault-secrets-operator.vault.address": vault.url}, + ) + + # Install Argo CD. + with action_group("Install Argo CD"): + self._helm.dependency_update("argocd") + self._helm.upgrade_application( + "argocd", + environment.name, + {"global.vaultSecretsPath": environment.vault_path_prefix}, + ) + + # Create and sync the top-level Argo CD application. + with action_group("Install science-platform app-of-apps"): + self._argocd.login("admin", argocd_password.get_secret_value()) + self._argocd.create_environment( + environment.name, + "science-platform", + git_url=git_url, + git_branch=git_branch, + ) + self._argocd.sync("science-platform") + project = Project.infrastructure + self._argocd.set_project("science-platform", project) + + # Sync Argo CD and wait for it to finish syncing so that the pods + # don't restart in the middle of proxying another Argo CD operation. + with action_group("Sync Argo CD"): + try: + self._argocd.sync("argocd") + except CommandFailedError: + # As of Argo CD 2.10.5, the first execution always fails with + # a spurious error claiming the infrastructure project had not + # been created. This is transient; the second execution + # succeeds. + self._argocd.sync("argocd") + for deployment in ( + "deployment/argocd-server", + "deployment/argocd-repo-server", + "statefulset/argocd-application-controller", + ): + self._kubernetes.wait_for_rollout(deployment, "argocd") + + # Sync applications that others have dependencies on, if they're + # enabled. + with action_group("Sync infrastructure applications"): + for application in ( + "ingress-nginx", + "cert-manager", + "postgres", + "gafaelfawr", + ): + if application in environment.applications: + self._argocd.sync(application) + + # Sync everything else. + with action_group("Sync remaining applications"): + self._argocd.sync_all( + "science-platform", timeout=timedelta(minutes=5) + ) def lint(self, environment: str | None = None) -> bool: """Lint the Helm chart for environments. diff --git a/src/phalanx/storage/argocd.py b/src/phalanx/storage/argocd.py new file mode 100644 index 0000000000..cd4c4c5231 --- /dev/null +++ b/src/phalanx/storage/argocd.py @@ -0,0 +1,206 @@ +"""Interface to Argo CD operations.""" + +from __future__ import annotations + +from datetime import timedelta + +from ..models.applications import Project +from .command import Command + +__all__ = ["ArgoCDStorage"] + + +class ArgoCDStorage: + """Interface to Argo CD operations. + + Calls the :command:`argocd` command-line client. Used primarily by the + installer. + """ + + def __init__(self) -> None: + self._argocd = Command("argocd") + + def create_environment( + self, + environment: str, + app_of_apps_name: str, + *, + git_url: str, + git_branch: str, + ) -> None: + """Manually create an Argo CD application. + + Used only by the installer for installing the app of apps to bootstrap + the environment. + + Parameters + ---------- + environment + Name of the environment. + app_of_apps_name + Name of the app of apps Argo CD application. + git_url + URL to the Phalanx Git repository. + git_branch + Name of the branch in that repository from which to pull the Argo + CD configuration. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "app", + "create", + app_of_apps_name, + "--repo", + git_url, + "--path", + "environments", + "--dest-namespace", + "argocd", + "--dest-server", + "https://kubernetes.default.svc", + "--upsert", + "--revision", + git_branch, + "--helm-set", + f"repoUrl={git_url}", + "--helm-set", + f"targetRevision={git_branch}", + "--values", + "values.yaml", + "--values", + f"values-{environment}.yaml", + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def login(self, username: str, password: str) -> None: + """Authenticate to Argo CD. + + Authenticates using username and password authentication with port + forwarding. This normally must be done before any other Argo CD + operations. + + Parameters + ---------- + username + Username for authentication. (Usually this will be ``admin``.) + password + Password for that user. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "login", + "--plaintext", + "--username", + username, + "--password", + password, + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def set_project(self, application: str, project: Project) -> None: + """Set the Argo CD project of an application. + + Parameters + ---------- + application + Application to change. + project + Project to move it into. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + """ + self._argocd.run( + "app", + "set", + application, + "--project", + project.value, + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def sync( + self, application: str, *, timeout: timedelta = timedelta(minutes=2) + ) -> None: + """Sync a specific Argo CD application. + + Parameters + ---------- + application + Name of the application. + timeout + How long to wait for the sync to complete. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Argo CD as an option, so normally the command should fail and + raise `~phalanx.exceptions.CommandFailedError` instead. This + exception means the Argo CD timeout didn't work for some reason. + """ + self._argocd.run( + "app", + "sync", + application, + "--timeout", + str(int(timeout.total_seconds())), + "--port-forward", + "--port-forward-namespace", + "argocd", + ) + + def sync_all( + self, + app_of_apps_name: str, + *, + timeout: timedelta = timedelta(seconds=30), + ) -> None: + """Sync all Argo CD applications under an app of apps. + + Parameters + ---------- + app_of_apps_name + Name of the parent app of apps. + timeout + How long to wait for the sync to complete. + + Raises + ------ + CommandFailedError + Raised if Argo CD fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Argo CD as an option, so normally the command should fail and + raise `~phalanx.exceptions.CommandFailedError` instead. This + exception means the Argo CD timeout didn't work for some reason. + """ + self._argocd.run( + "app", + "sync", + "-l", + f"argocd.argoproj.io/instance={app_of_apps_name}", + "--timeout", + str(int(timeout.total_seconds())), + "--port-forward", + "--port-forward-namespace", + "argocd", + ) diff --git a/src/phalanx/storage/command.py b/src/phalanx/storage/command.py index 2e96b80bb7..75f88b7f7f 100644 --- a/src/phalanx/storage/command.py +++ b/src/phalanx/storage/command.py @@ -3,9 +3,10 @@ from __future__ import annotations import subprocess +from datetime import timedelta from pathlib import Path -from ..exceptions import CommandFailedError +from ..exceptions import CommandFailedError, CommandTimedOutError __all__ = ["Command"] @@ -32,7 +33,7 @@ def __init__(self, command: str) -> None: def capture( self, *args: str, cwd: Path | None = None ) -> subprocess.CompletedProcess: - """Run Helm, checking for errors and capturing the output. + """Run the command, checking for errors and capturing the output. This method should only be called by subclasses, which should provide a higher-level interface used by the rest of the program. @@ -76,11 +77,14 @@ def run( cwd: Path | None = None, ignore_fail: bool = False, quiet: bool = False, + timeout: timedelta | None = None, ) -> None: """Run the command with the provided arguments. - This method should only be called by subclasses, which should provide - a higher-level interface used by the rest of the program. + Standard output and standard error are not redirected and will go to + the standard output and error of the caller. This method should only + be called by subclasses, which should provide a higher-level interface + used by the rest of the program. Parameters ---------- @@ -94,12 +98,19 @@ def run( quiet If `True`, discard standard output. Standard error is still displayed on the process standard error stream. + timeout + If given, the command will be terminated and a + `~phalanx.exceptions.CommandTimedOutError` will be raised if + execution time exceeds this timeout. Raises ------ CommandFailedError Raised if the command failed and ``ignore_fail`` was not set to `True`. + CommandTimedOutError + Raised if ``timeout`` was given and the command took longer than + that to complete. subprocess.SubprocessError Raised if the command could not be executed at all. """ @@ -110,3 +121,5 @@ def run( subprocess.run(cmdline, check=check, cwd=cwd, stdout=stdout) except subprocess.CalledProcessError as e: raise CommandFailedError(self._command, args, e) from e + except subprocess.TimeoutExpired as e: + raise CommandTimedOutError(self._command, args, e) from e diff --git a/src/phalanx/storage/config.py b/src/phalanx/storage/config.py index 8ba05a5d02..86e3340a39 100644 --- a/src/phalanx/storage/config.py +++ b/src/phalanx/storage/config.py @@ -8,6 +8,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, Self +from urllib.parse import urlparse import yaml from git import Diff @@ -17,6 +18,7 @@ from ..constants import HELM_DOCLINK_ANNOTATION from ..exceptions import ( ApplicationExistsError, + GitRemoteError, InvalidApplicationConfigError, InvalidSecretConfigError, UnknownEnvironmentError, @@ -312,6 +314,67 @@ def get_environment_chart_path(self) -> Path: """ return self._path / "environments" + def get_git_branch(self) -> str: + """Get the Git branch of the current repository. + + Returns + ------- + str + Branch name. + """ + return Repo(str(self._path)).active_branch.name + + def get_git_url(self) -> str: + """Get the Git URL of the current repository. + + Assumes that the current repository is a cloned Git repository with a + remote named ``origin`` and returns the URL of that origin, + transformed to an ``https`` URL if necessary. This is used to get the + URL of the repository for configuring Argo CD during installation of + an environment. + + Returns + ------- + str + URL to the Git repository of the current config tree, suitable + for Argo CD. + + Raises + ------ + GitRemoteError + Raised if the ``origin`` remote does not exist or if its URL is + not in a recognized format. + """ + repo = Repo(str(self._path)) + try: + origin = repo.remote("origin") + except ValueError as e: + msg = 'Current repository has no remote named "origin"' + raise GitRemoteError(msg) from e + if not origin.url: + raise GitRemoteError('Remote "origin" has no URL') + + # If the URL is not an https URL, accept a few forms of github.com + # URLs that can be converted into one. + parsed_url = urlparse(origin.url) + if parsed_url.scheme == "ssh" and parsed_url.hostname == "github.com": + return parsed_url._replace( + scheme="https", netloc=parsed_url.hostname + ).geturl() + elif parsed_url.scheme == "https": + return origin.url + elif parsed_url.scheme == "": + match = re.match(r"git@github\.com:([^:/]+/[^:/]+)$", origin.url) + if match: + return "https://github.com/" + match.group(1) + + # If we fell through, we were unable to parse the URL. + msg = ( + "Cannot determine Argo CD Git URL from origin URL of" + f' "{origin.url}"' + ) + raise GitRemoteError(msg) + def get_modified_applications(self, branch: str) -> dict[str, list[str]]: """Get all modified application and environment pairs. diff --git a/src/phalanx/storage/helm.py b/src/phalanx/storage/helm.py index 8bde6c0a41..274d236b3d 100644 --- a/src/phalanx/storage/helm.py +++ b/src/phalanx/storage/helm.py @@ -3,6 +3,7 @@ from __future__ import annotations import sys +from datetime import timedelta from urllib.parse import urlparse from ..exceptions import CommandFailedError @@ -38,6 +39,11 @@ def create(self, application: str, starter: HelmStarter) -> None: Name of the new application. starter Name of the Helm starter template to use. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ starter_path = self._config.get_starter_path(starter) application_path = self._config.get_application_chart_path(application) @@ -56,7 +62,8 @@ def dependency_update( Tell Helm to update any third-party chart dependencies for an application and store them in the :file:`charts` subdirectory. This is - a prerequisite for :command:`helm lint` or :command:`helm template`. + a prerequisite for `lint_application`, `template_application`, or + `upgrade_application`. Assumes that remote repositories have already been refreshed with `repo_update` and tells Helm to skip that. @@ -67,6 +74,11 @@ def dependency_update( Application whose dependencies should be updated. quiet Whether to suppress Helm's standard output. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ application_path = self._config.get_application_chart_path(application) self._helm.run( @@ -210,6 +222,8 @@ def repo_add(self, url: str, *, quiet: bool = False) -> None: Raises ------ + CommandFailedError + Raised if Helm fails. ValueError Raised if the Helm repository URL is invalid. """ @@ -231,6 +245,11 @@ def repo_update(self, *, quiet: bool = False) -> None: ---------- quiet Whether to suppress Helm's standard output. + + Raises + ------ + CommandFailedError + Raised if Helm fails. """ self._helm.run("repo", "update", quiet=quiet) @@ -332,6 +351,71 @@ def template_environment(self, environment: str) -> str: sys.stderr.write(result.stderr) return result.stdout + def upgrade_application( + self, + application: str, + environment: str, + values: dict[str, str], + *, + timeout: timedelta = timedelta(minutes=2), + ) -> None: + """Install or upgrade an application using Helm. + + Runs :command:`helm upgrade --install` to install an application chart + in the given environment. Assumes that :command:`helm dependency + update` has already been run to download any third-party charts. Any + output to standard error is passed along. + + This method bypasses Argo CD and should only be used by the installer + to bootstrap the environment. + + Parameters + ---------- + application + Name of the application. + environment + Name of the environment in which to lint that application chart, + used to select the :file:`values-{environment}.yaml` file to add. + values + Extra key/value pairs to set. + timeout + Fail if the operation takes longer than this. The enforced timeout + in Python will be one second longer to allow Helm to time out its + own command first. + + Raises + ------ + CommandFailedError + Raised if Helm fails. + CommandTimedOutError + Raised if the command timed out. The timeout is also passed to + Helm as an option, so normally the command should fail and raise + `~phalanx.exceptions.CommandFailedError` instead. This exception + means the Helm timeout didn't work for some reason. + """ + application_path = self._config.get_application_chart_path(application) + set_arg = ",".join(f"{k}={v}" for k, v in values.items()) + self._helm.run( + "upgrade", + application, + str(application_path), + "--install", + "--values", + f"{application}/values.yaml", + "--values", + f"{application}/values-{environment}.yaml", + "--set", + set_arg, + "--create-namespace", + "--namespace", + application, + "--timeout", + f"{int(timeout.total_seconds())}s", + "--wait", + cwd=application_path.parent, + timeout=timeout + timedelta(seconds=1), + ) + def _print_lint_output( self, application: str | None, environment: str, output: str | None ) -> None: diff --git a/src/phalanx/storage/kubernetes.py b/src/phalanx/storage/kubernetes.py new file mode 100644 index 0000000000..a93dbf1d30 --- /dev/null +++ b/src/phalanx/storage/kubernetes.py @@ -0,0 +1,80 @@ +"""Storage layer for direct Kubernetes operations.""" + +from __future__ import annotations + +from .command import Command + +__all__ = ["KubernetesStorage"] + + +class KubernetesStorage: + """Storage layer for direct Kubernetes operations. + + Used primarily by the installer. This uses :command:`kubectl` directly + rather than one of the Python Kubernetes libraries since it seemed simpler + at the time. + """ + + def __init__(self) -> None: + self._kubectl = Command("kubectl") + + def create_namespace( + self, namespace: str, *, ignore_fail: bool = False + ) -> None: + """Create a Kubernetes namespace. + + Parameters + ---------- + namespace + Namespace to create. + ignore_fail + If `True`, ignore failures, such as when the namespace already + exists. + + Raises + ------ + CommandFailedError + Raised if the namespace creation fails, and ``ignore_fail`` was + not set to `True`. + """ + self._kubectl.run("create", "ns", namespace, ignore_fail=ignore_fail) + + def create_generic_secret( + self, name: str, namespace: str, keys: dict[str, str] + ) -> None: + """Create a generic Kubernetes ``Secret`` resource. + + Parameters + ---------- + name + Name of the secret. + namespace + Namespace of the secret. + keys + Key and value pairs to put into the secret. + """ + args = [ + "create", + "secret", + "generic", + name, + "--namespace", + namespace, + ] + for key, value in keys.items(): + args.append(f"--from-literal={key}={value}") + self._kubectl.run(*args) + + def wait_for_rollout(self, name: str, namespace: str) -> None: + """Wait for a Kubernetes rollout to complete. + + Parameters + ---------- + name + Name of the rollout. This should be the type of object (usually + either ``deployment`` or ``statefulset``, followed by a slash and + the name of the object. + namespace + Namespace in which the rollout is happening. + """ + self._kubectl.run("-n", namespace, "rollout", "status", name) diff --git a/src/phalanx/storage/vault.py b/src/phalanx/storage/vault.py index 97282775f7..4eedd9af36 100644 --- a/src/phalanx/storage/vault.py +++ b/src/phalanx/storage/vault.py @@ -12,8 +12,11 @@ from ..models.environments import EnvironmentBaseConfig from ..models.vault import ( VaultAppRole, + VaultAppRoleCredentials, VaultAppRoleMetadata, + VaultCredentials, VaultToken, + VaultTokenCredentials, VaultTokenMetadata, ) @@ -26,9 +29,16 @@ class VaultClient: This client is specific to a particular Phalanx environment. It is created using the metadata of a Phalanx environment by `VaultStorage`. - The Vault authentication token is taken from either the ``VAULT_TOKEN`` - environment variable or a :file:`.vault-token` file in the user's home - directory. + If neither ``approle`` nor ``token`` are given, token authentication is + used and the the token is taken from the ``VAULT_TOKEN`` environment + variable or a :file:`.vault-token` file in the user's home directory. + + Attributes + ---------- + url + URL of the configured Vault server. + path + Prefix path within Vault where secrets are stored. Parameters ---------- @@ -37,13 +47,30 @@ class VaultClient: path Path within that Vault server where secrets for an environment are stored. + credentials + Credentials to use for authentication. If this is not set, fall back + on the default library behavior of getting the token from the + environment or the user's home directory. """ - def __init__(self, url: str, path: str) -> None: - self._url = url - _, self._path = path.split("/", 1) + def __init__( + self, + url: str, + path: str, + credentials: VaultCredentials | None = None, + ) -> None: + self.url = url + _, self.path = path.split("/", 1) self._vault = hvac.Client(url) self._vault.secrets.kv.default_kv_version = 2 + match credentials: + case VaultAppRoleCredentials(): + self._vault.auth.approle.login( + role_id=credentials.role_id, + secret_id=credentials.secret_id, + ) + case VaultTokenCredentials(): + self._vault.token = credentials.token def create_approle(self, name: str, policies: list[str]) -> VaultAppRole: """Create a new Vault AppRole for secret access. @@ -137,7 +164,7 @@ def delete_application_secret(self, application: str) -> None: application Name of the application. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" with suppress(InvalidPath): self._vault.secrets.kv.delete_latest_version_of_secret(path) @@ -159,13 +186,13 @@ def get_application_secret(self, application: str) -> dict[str, SecretStr]: VaultNotFoundError Raised if the requested secret was not found in Vault. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" try: r = self._vault.secrets.kv.read_secret( path, raise_on_deleted_version=True ) except InvalidPath as e: - raise VaultNotFoundError(self._url, path) from e + raise VaultNotFoundError(self.url, path) from e return {k: SecretStr(v) for k, v in r["data"]["data"].items()} def get_approle(self, name: str) -> VaultAppRoleMetadata | None: @@ -264,9 +291,9 @@ def list_application_secrets(self) -> list[str]: Raised if the path for application secrets does not exist. """ try: - r = self._vault.secrets.kv.list_secrets(self._path) + r = self._vault.secrets.kv.list_secrets(self.path) except InvalidPath as e: - raise VaultNotFoundError(self._url, self._path) from e + raise VaultNotFoundError(self.url, self.path) from e return r["data"]["keys"] def list_token_accessors(self) -> list[str]: @@ -314,7 +341,7 @@ def store_application_secret( values Secret key and value pairs. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" secret = {k: v.get_secret_value() for k, v in values.items()} self._vault.secrets.kv.create_or_update_secret(path, secret) @@ -332,7 +359,7 @@ def update_application_secret( value New value for that secret key. """ - path = f"{self._path}/{application}" + path = f"{self.path}/{application}" self._vault.secrets.kv.patch(path, {key: value.get_secret_value()}) @@ -340,7 +367,11 @@ class VaultStorage: """Create Vault clients for specific environments.""" def get_vault_client( - self, env: EnvironmentBaseConfig, path_prefix: str | None = None + self, + env: EnvironmentBaseConfig, + path_prefix: str | None = None, + *, + credentials: VaultCredentials | None = None, ) -> VaultClient: """Return a Vault client configured for the given environment. @@ -351,14 +382,24 @@ def get_vault_client( path_prefix Path prefix within Vault for application secrets. If given, this overrides the path prefix in the environment configuration. + credentials + Credentials to use for authentication. If this is not set, fall + back on the default library behavior of getting the token from + the environment or the user's home directory. Returns ------- VaultClient Vault client configured to manage secrets for that environment. + + Raises + ------ + ValueError + Raised if ``vaultUrl`` is not set for the environment or if both + a Vault AppRole and a Vault token were provided. """ if not path_prefix: path_prefix = env.vault_path_prefix if not env.vault_url: raise ValueError("vaultUrl not set for this environment") - return VaultClient(str(env.vault_url), path_prefix) + return VaultClient(str(env.vault_url), path_prefix, credentials) diff --git a/tox.ini b/tox.ini index 99fc150b99..87733c00bd 100644 --- a/tox.ini +++ b/tox.ini @@ -32,8 +32,14 @@ depends = commands = coverage report +[testenv:install] +description = Run the installer (used for CI) +commands = phalanx environment install {posargs} +passenv = + GITHUB_* + [testenv:lint] -description = Lint codebase by running pre-commit (Black, isort, Flake8). +description = Lint codebase by running pre-commit skip_install = true deps = pre-commit