From 9bf4d8e94ddf79759613b0ffde96dedfbe8c1973 Mon Sep 17 00:00:00 2001 From: mck Date: Mon, 22 Jul 2024 10:50:00 +0200 Subject: [PATCH] Make collect-info multi-arch compatible, providing arm64 support Additional adjustments to integration tests: - Had to remove the macos step, as the macos runners are now arm64 mX based and don't support docker. - Upgraded cass-operator, Cassandra, and DSE versions used in the tests. - Reduced DSE tests to single node. - Had to use `--force-conflicts --server-side` on cass-operator, see https://github.com/k8ssandra/k8ssandra/issues/1380 - Added iproute2 to test images (to get `ip` command) - A darwin arm64 `collect-info` is built if the local machine is such and `ructc` is on the path, and this is used when running `ds-collector`, on the assumption this is a mac mX machine running integration tests. A clear message is printed if this assumption is wrong and how to remedy it. - The use of `df` command is made macos friendly in `ds-collector` (for the above point). Also added the licence header to ds-collector script. --- .github/workflows/ds-collector-ci.yml | 39 +++++++++++-------- .gitignore | 2 +- Makefile | 7 +++- README.md | 15 +++++-- ds-collector-tests/cluster-dse-k8s.make | 5 +-- .../cluster-one-node-vanilla-ssh-docker.make | 1 - ds-collector-tests/cluster-vanilla-k8s.make | 1 - .../cluster-vanilla-ssh-docker.make | 1 - ds-collector-tests/integration-bastion.docker | 2 +- .../integration-cassandra.docker | 2 +- .../example-cassdc-minimal-dse.yaml | 8 ++-- ds-collector/README.md | 6 +-- ds-collector/ds-collector | 30 +++++++++++--- ds-collector/rust-commands/README.md | 36 ++++++++++++++++- 14 files changed, 110 insertions(+), 45 deletions(-) diff --git a/.github/workflows/ds-collector-ci.yml b/.github/workflows/ds-collector-ci.yml index 891c098..db92e1d 100644 --- a/.github/workflows/ds-collector-ci.yml +++ b/.github/workflows/ds-collector-ci.yml @@ -25,26 +25,29 @@ jobs: - name: Test ds-collector working-directory: ds-collector-tests run: | + sudo apt-get install -y binfmt-support qemu qemu-user-static echo "Testing ds-collector" make -f cluster-vanilla-ssh-docker.make - test-collector-ssh-docker-macos: - runs-on: macos-11 - - steps: - - uses: docker-practice/actions-setup-docker@1.0.11 - - uses: actions/checkout@v2 - - name: Set up JDK 1.8 - uses: actions/setup-java@v1 - with: - java-version: 1.8 - - - name: Test ds-collector - working-directory: ds-collector-tests - run: | - brew install coreutils - echo "Testing ds-collector" - make -f cluster-one-node-vanilla-ssh-docker.make +# TODO – fix when gha offers any arm64 runner (that can run docker) +# +# test-collector-ssh-docker-macos: +# runs-on: macos-14 +# +# steps: +# - uses: docker-practice/actions-setup-docker@1.0.11 +# - uses: actions/checkout@v2 +# - name: Set up JDK 1.8 +# uses: actions/setup-java@v1 +# with: +# java-version: 1.8 +# +# - name: Test ds-collector +# working-directory: ds-collector-tests +# run: | +# brew install coreutils +# echo "Testing ds-collector" +# make -f cluster-one-node-vanilla-ssh-docker.make test-collector-k8s-cassandra: runs-on: ubuntu-latest @@ -59,6 +62,7 @@ jobs: - name: Test ds-collector working-directory: ds-collector-tests run: | + sudo apt-get install -y binfmt-support qemu qemu-user-static echo "Testing ds-collector" make -f cluster-vanilla-k8s.make @@ -75,6 +79,7 @@ jobs: - name: Test ds-collector working-directory: ds-collector-tests run: | + sudo apt-get install -y binfmt-support qemu qemu-user-static echo "Testing ds-collector" make -f cluster-dse-k8s.make diff --git a/.gitignore b/.gitignore index bfe0736..e27272c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,5 @@ hs_err_pid* ds-collector.tar.gz ds-collector.*.tar.gz collector/ -ds-collector/collect-info +ds-collector/collect-info* ds-collector/logs diff --git a/Makefile b/Makefile index 1bc8108..27c1e4b 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,11 @@ collector: check-env generate-key @cp -R ds-collector/ collector @rm -f collector/collect-info - @cd collector ; docker run --rm -v $$PWD:/volume -w /volume -t clux/muslrust rustc --target x86_64-unknown-linux-musl rust-commands/*.rs ; cd - - @test -f collector/collect-info + @cd collector ; if ( command -v rustc >/dev/null 2>&1 ) && [ "aarch64-apple-darwin" = "$(rustc -vV | grep host | cut -d' ' -f2)" ] ; then rustc rust-commands/*.rs ; mv collect-info collect-info.aarch64-apple-darwin ; fi ; cd - + @cd collector ; docker run --rm --platform linux/arm64 -v /usr/bin/qemu-aarch64-static:/usr/bin/qemu-aarch64-static -v $$PWD:/volume -w /volume -t clux/muslrust rustc --target aarch64-unknown-linux-musl rust-commands/*.rs ; mv collect-info collect-info.aarch64-unknown-linux-musl ; cd - + @cd collector ; docker run --rm --platform linux/amd64 -v $$PWD:/volume -w /volume -t clux/muslrust rustc --target x86_64-unknown-linux-musl rust-commands/*.rs ; mv collect-info collect-info.x86_64-unknown-linux-musl ; cd - + @test -f collector/collect-info.aarch64-unknown-linux-musl + @test -f collector/collect-info.x86_64-unknown-linux-musl @rm -f collector/collector.hosts @rm -f collector/collector.conf @mv collector/collector.hosts.in collector/collector.hosts diff --git a/README.md b/README.md index c1e1205..26b6203 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ -# DataStax Diagnostic Collector for Apache Cassandra™ and DataStax Enterprise (DSE) ™ +# Diagnostic Collector for Apache Cassandra™, DSE™, HCD™, … -A script for collecting a diagnostic snapshot from each node in a Cassandra based cluster. +A script for collecting a diagnostic snapshot (support bundle) from each node in a Cassandra based cluster. -The code for the collector script is in the _ds-collector/_ directory. It must first be built into a collector tarball. +The code for the collector script is in the _ds-collector/_ directory. + +Then _ds-collector/_ code must first be built into a `ds-collector*.tar.gz` tarball. + +The built `ds-collector*.tar.gz` tarball is then extracted onto a bastion or jumpbox that has access to the nodes in the cluster. Once extracted, the configuration file (collector.conf) can be edited to match any cluster deployment customisations (e.g. non-default port numbers, non-default log location, etc). The ds-collector script can then be executed; first in test mode and then in collection mode. -This collector tarball is then extracted onto a bastion or jumpbox that has access to the nodes in the cluster. Once extracted, the configuration file (collector.conf) can be edited to match any cluster deployment customisations (e.g. non-default port numbers, non-default log location, etc). The ds-collector script can then be executed; first in test mode and then in collection mode. # Pre-configuring the Collector Configuration When building the collector, it can be instructed to pre-configure the collector.conf by setting the following variables: @@ -20,6 +23,7 @@ export is_k8s=true If no variables are set, then the collector will be pre-configured to assume Apache Cassandra running on hosts which can be accessed via SSH. + # Building the Collector Build the collector using the following make command syntax. You will need make and Docker. @@ -31,6 +35,7 @@ make This will generate a _.tar.gz_ tarball with the `issueId` set in the packaged configuration file. The archive will named in the format `ds-collector.$ISSUE.tar.gz`. + # Building the Collector with automatic s3 upload ability If the collector is built with the following variables defined, all collected diagnostic snapshots will be encrypted and uploaded to a specific AWS S3 bucket. Encryption will use a one-off built encryption key that is created locally. @@ -50,6 +55,7 @@ This will then generate a .tar.gz tarball as described above, additionally with In addition to the _.tar.gz_ tarball, an encryption key is now generated. The encryption key must be placed in the same directory as the extracted collector tarball for it to execute. If the tarball is being sent to someone else, it is recommeneded to send the encryption key via a different (and preferably secured) medium. + # Storing Encryption keys within the AWS Secrets Manager The collector build process also supports storing and retrieving keys from the AWS secrets manager, to use this feature, 2 additional environment variables must be provided before the script is run. @@ -69,6 +75,7 @@ When the collector is built, it will also upload the generated encryption key to Please be careful with the encryption keys. They should only be stored in a secure vault (such as the AWS Secrets Manager), and temporarily on the jumpbox or bastion where and while the collector script is being executed. The encryption key ensures the diagnostic snapshots are secured when transferred over the network and stored in the AWS S3 bucket. + # Executing the Collector Script against a Cluster Instructions for execution of the Collector script are found in `ds-collector/README.md`. These instructions are also bundled into the built collector tarball. diff --git a/ds-collector-tests/cluster-dse-k8s.make b/ds-collector-tests/cluster-dse-k8s.make index 1ed16af..4d9def8 100755 --- a/ds-collector-tests/cluster-dse-k8s.make +++ b/ds-collector-tests/cluster-dse-k8s.make @@ -30,7 +30,6 @@ setup: tar -xvf ../ds-collector.TEST-cluster-dse-k8s-*.tar.gz rm collector/collector.conf cp TEST-cluster-dse-k8s-*_secret.key collector/ || true - test -f collector/collect-info # setup k8s cluster cp k8s-manifests/01-kind-config.yaml /tmp/datastax/01-kind-config.yaml kind create cluster --name ds-collector-cluster-dse-k8s --config /tmp/datastax/01-kind-config.yaml @@ -46,7 +45,7 @@ setup: true # Note if you change the cass-operator version, you may also want to change the DSE version in the example-cassdc-minimal-dse.yaml file - kubectl apply -k github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.10.3 + kubectl apply --force-conflicts --server-side -k github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.22.0 while (! kubectl -n cass-operator get pod | grep -q "cass-operator-") || kubectl -n cass-operator get pod | grep -q "0/1" ; do kubectl -n cass-operator get pod ; echo "waiting 10s…" ; sleep 10 ; done kubectl -n cass-operator apply -f k8s-manifests/example-cassdc-minimal-dse.yaml while (! kubectl -n cass-operator get pod | grep -q "cluster2-dc1-default-sts-0") || kubectl -n cass-operator get pod | grep -q "0/2" || kubectl -n cass-operator get pod | grep -q "1/2" ; do kubectl -n cass-operator get pod ; echo "waiting 60s…" ; sleep 60 ; done @@ -54,6 +53,6 @@ setup: teardown: kubectl delete cassdcs --all-namespaces --all - kubectl delete -k github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.10.3 + kubectl delete -k github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.22.0 kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/v1.7.1/cert-manager.yaml kind delete cluster --name ds-collector-cluster-dse-k8s diff --git a/ds-collector-tests/cluster-one-node-vanilla-ssh-docker.make b/ds-collector-tests/cluster-one-node-vanilla-ssh-docker.make index 4f47cea..6850b55 100755 --- a/ds-collector-tests/cluster-one-node-vanilla-ssh-docker.make +++ b/ds-collector-tests/cluster-one-node-vanilla-ssh-docker.make @@ -47,7 +47,6 @@ setup: tar -xvf ../ds-collector.TEST-cluster-one-node-vanilla-ssh-docker-*.tar.gz rm collector/collector.conf cp TEST-cluster-one-node-vanilla-ssh-docker-*_secret.key collector/ || true - test -f collector/collect-info # setup single node docker cluster and bastion docker-compose up --build -d cassandra-00 bastion docker-compose ps diff --git a/ds-collector-tests/cluster-vanilla-k8s.make b/ds-collector-tests/cluster-vanilla-k8s.make index 44d0550..7d44e8c 100755 --- a/ds-collector-tests/cluster-vanilla-k8s.make +++ b/ds-collector-tests/cluster-vanilla-k8s.make @@ -27,7 +27,6 @@ setup: tar -xvf ../ds-collector.TEST-cluster-vanilla-k8s-*.tar.gz rm collector/collector.conf cp TEST-cluster-vanilla-k8s-*_secret.key collector/ || true - test -f collector/collect-info # setup k8s cluster cp k8s-manifests/01-kind-config.yaml /tmp/datastax/01-kind-config.yaml kind create cluster --name ds-collector-cluster-vanilla-k8s --config /tmp/datastax/01-kind-config.yaml diff --git a/ds-collector-tests/cluster-vanilla-ssh-docker.make b/ds-collector-tests/cluster-vanilla-ssh-docker.make index ac8bad3..7aa1d3f 100755 --- a/ds-collector-tests/cluster-vanilla-ssh-docker.make +++ b/ds-collector-tests/cluster-vanilla-ssh-docker.make @@ -47,7 +47,6 @@ setup: tar -xvf ../ds-collector.TEST-cluster-vanilla-ssh-docker-*.tar.gz rm collector/collector.conf cp TEST-cluster-vanilla-ssh-docker-*_secret.key collector/ || true - test -f collector/collect-info # setup docker cluster and bastion docker-compose up --build -d docker-compose ps diff --git a/ds-collector-tests/integration-bastion.docker b/ds-collector-tests/integration-bastion.docker index a03232f..c43dd44 100644 --- a/ds-collector-tests/integration-bastion.docker +++ b/ds-collector-tests/integration-bastion.docker @@ -1,7 +1,7 @@ FROM ubuntu:latest ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y openssh-server locales sshpass procps ethtool netcat net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl xxd +RUN apt-get update && apt-get install -y openssh-server locales sshpass procps ethtool netcat-traditional net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl xxd iproute2 RUN mkdir /var/run/sshd diff --git a/ds-collector-tests/integration-cassandra.docker b/ds-collector-tests/integration-cassandra.docker index d2fa7d4..5f1ea03 100644 --- a/ds-collector-tests/integration-cassandra.docker +++ b/ds-collector-tests/integration-cassandra.docker @@ -1,7 +1,7 @@ FROM cassandra:3.11 ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y openssh-server wait-for-it netcat sudo procps ethtool lsof netcat net-tools sysstat pciutils ntp ntpstat numactl lvm2 +RUN apt-get update && apt-get install -y openssh-server wait-for-it netcat sudo procps ethtool lsof netcat net-tools sysstat pciutils ntp ntpstat numactl lvm2 iproute2 RUN mkdir /var/run/sshd RUN echo 'root:root' | chpasswd RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config diff --git a/ds-collector-tests/k8s-manifests/example-cassdc-minimal-dse.yaml b/ds-collector-tests/k8s-manifests/example-cassdc-minimal-dse.yaml index 9ba0007..38ad607 100644 --- a/ds-collector-tests/k8s-manifests/example-cassdc-minimal-dse.yaml +++ b/ds-collector-tests/k8s-manifests/example-cassdc-minimal-dse.yaml @@ -7,10 +7,10 @@ metadata: spec: clusterName: cluster2 serverType: dse - serverVersion: "6.8.21" + serverVersion: "6.8.39" managementApiAuth: insecure: {} - size: 3 + size: 1 storageConfig: cassandraDataVolumeClaimSpec: storageClassName: server-storage @@ -25,6 +25,6 @@ spec: max_heap_size: "800M" max_direct_memory: "800M" additional-jvm-opts: - # As the database comes up for the first time, set system keyspaces to RF=3 + # As the database comes up for the first time, set system keyspaces to RF=1 - "-Ddse.system_distributed_replication_dc_names=dc1" - - "-Ddse.system_distributed_replication_per_dc=3" + - "-Ddse.system_distributed_replication_per_dc=1" diff --git a/ds-collector/README.md b/ds-collector/README.md index f4cdba2..c0c819f 100644 --- a/ds-collector/README.md +++ b/ds-collector/README.md @@ -1,7 +1,7 @@ -DataStax Diagnostic Collector for Apache Cassandra™ and DataStax Enterprise (DSE)™ -=============================================================================================== +Diagnostic Collector for Apache Cassandra™, DSE™, HCD™, … +============================================================================== -The Diagnostic Collector bundle is used to collect diagnostic snapshots over all nodes in an Apache Cassandra or DataStax Enterprise cluster. +The Diagnostic Collector bundle is used to collect diagnostic snapshots (support bundles) over all nodes in an Apache Cassandra, or Cassandra based product, cluster. It can be run on Linux or Mac server that has ssh/docker/k8s access to the nodes in the cluster. It cannot be directly run on a node in the cluster. diff --git a/ds-collector/ds-collector b/ds-collector/ds-collector index c4ca354..ca0ccf3 100755 --- a/ds-collector/ds-collector +++ b/ds-collector/ds-collector @@ -1,8 +1,20 @@ #!/bin/bash # -# ds-collector # -# Collect artifacts and ship for analysis +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# ds-collector :: Collect artifacts and ship for analysis # # The following are expected to be installed on the Cassandra/DSE nodes: # blockdev @@ -31,7 +43,7 @@ # uptime # # On a debian/ubuntu server these can be installed by running: -# `apt-get install -y procps ethtool lsof net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl` +# `apt-get install -y procps ethtool iproute2 lsof net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl` # @@ -165,7 +177,7 @@ list_cassandra_nodes() { required_basedir_space="$((${node_count} * 500000))" # detect if df supports --portability DF_OPT="" - ( df --help | grep -q "\-\-portability" ) && DF_OPT="--portability" + ( ( df --help 2>/dev/null ) | grep -q "\-\-portability" ) && DF_OPT="--portability" [ $(df $DF_OPT "$baseDir" | tail -n +2 | awk '{print $4}') -ge ${required_basedir_space} ] || { echo >&2 "A diagnostic collection of ${node_count} nodes requires at least $((${required_basedir_space} / 1000000))GB free at $baseDir"; exit 1; } # also grab the cluster name @@ -966,7 +978,15 @@ get_info() { node_push "$0" "$baseDir/$targetFile" node_push "${script_directory}/${prometheus}" "$baseDir/${prometheus}" node_push "${script_directory}/$dstat" "$baseDir/$dstat" - node_push "${script_directory}/collect-info" "$baseDir/collect-info" + collect_info_binary="collect-info.x86_64-unknown-linux-musl" + arch="$(node_connect 'arch')" + if ( command -v rustc >/dev/null 2>&1 ) && [ "aarch64-apple-darwin" == "$(rustc -vV | grep host | cut -d' ' -f2)" ] && test -f "${script_directory}/collect-info.aarch64-apple-darwin" ; then + echo "Jumpbox/Bastion is aarch64-apple-darwin and collector was built on aarch64-apple-darwin. Assuming nodes are aarch64-apple-darwin too. Is this is not true, delete the collect-info.aarch64-apple-darwin file and execute again." + collect_info_binary="collect-info.aarch64-apple-darwin" + elif [[ ${arch} =~ "arm" ]] || [[ ${arch} =~ "aarch64" ]] ; then + collect_info_binary="collect-info.aarch64-unknown-linux-musl" + fi + node_push "${script_directory}/${collect_info_binary}" "${baseDir}/collect-info" node_connect "mkdir -p $baseDir/etc" for f in ${script_directory}/etc/*; do diff --git a/ds-collector/rust-commands/README.md b/ds-collector/rust-commands/README.md index a0f1a4c..a87f596 100644 --- a/ds-collector/rust-commands/README.md +++ b/ds-collector/rust-commands/README.md @@ -1,4 +1,38 @@ -## Compile rust binaries for Linux + +The following is done for you in the top directory `Makefile`. Bundled `ds-collector.*.tar.gz` tarballs should already contain multiple `collect-info` binaries to cover your architecture. The following instructions are normally not required. + + +## Compile rust binaries for Linux amd64 and arm64 docker run -v $PWD:/volume -w /volume -t clux/muslrust rustc --target x86_64-unknown-linux-musl *.rs + +On Mac aarch: + + docker run --platform linux/arm64 -v $PWD:/volume -w /volume -t clux/muslrust rustc --target aarch64-unknown-linux-musl *.rs + + mv collect-info ../collect-info.aarch64-unknown-linux-musl + + docker run --platform linux/amd64 -v $PWD:/volume -w /volume -t clux/muslrust rustc --target x86_64-unknown-linux-musl *.rs + + mv collect-info ../collect-info.x86_64-unknown-linux-musl + + +On Linux (or old Mac): + + sudo apt-get install -y qemu binfmt-support qemu-user-static + + docker run --platform linux/arm64 -v /usr/bin/qemu-aarch64-static:/usr/bin/qemu-aarch64-static -v $PWD:/volume -w /volume -t clux/muslrust rustc --target aarch64-unknown-linux-musl *.rs + + mv ../collect-info collect-info.aarch64-unknown-linux-musl + + docker run --platform linux/amd64 -v $PWD:/volume -w /volume -t clux/muslrust rustc --target x86_64-unknown-linux-musl *.rs + + mv collect-info ../collect-info.x86_64-unknown-linux-musl + + +On Mac aarch, to run integration tests on same Mac aarch: + + rustc *.rs + + mv collect-info ../collect-info.aarch64-apple-darwin \ No newline at end of file