#!/usr/bin/env bash set -eEx : ${CEPH_DEV_FOLDER:=${PWD}} CLUSTER_SPEC=${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml DEFAULT_NS="rook-ceph" KUBECTL="minikube kubectl --" export ROOK_CLUSTER_NS="${ROOK_CLUSTER_NS:=$DEFAULT_NS}" ## CephCluster namespace # We build a local ceph image that contains the latest code # plus changes from the PR. This image will be used by the docker # running inside the minikube to start the different ceph pods LOCAL_CEPH_IMG="local/ceph" on_error() { echo "on error" minikube delete } setup_minikube_env() { # Check if Minikube is running if minikube status > /dev/null 2>&1; then echo "Minikube is running" minikube stop minikube delete else echo "Minikube is not running" fi rm -rf ~/.minikube minikube start --memory="6144" --disk-size=20g --extra-disks=4 --driver kvm2 # point Docker env to use docker daemon running on minikube eval $(minikube docker-env -p minikube) } build_ceph_image() { CURR_CEPH_IMG=$(grep -E '^\s*image:\s+' $CLUSTER_SPEC | sed 's/.*image: *\([^ ]*\)/\1/') cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci mkdir -p tmp_build/rook mkdir -p tmp_build/orchestrator cp ./../../orchestrator/*.py tmp_build/orchestrator cp ../*.py tmp_build/rook cp -r ../../../../../src/python-common/ceph/ tmp_build/ # we use the following tag to trick the Docker # running inside minikube so it uses this image instead # of pulling it from the registry docker build --tag ${LOCAL_CEPH_IMG} . docker tag ${LOCAL_CEPH_IMG} ${CURR_CEPH_IMG} # cleanup rm -rf tmp_build cd ${CEPH_DEV_FOLDER} } create_rook_cluster() { $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/crds.yaml $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/common.yaml $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/operator.yaml $KUBECTL create -f $CLUSTER_SPEC $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/toolbox.yaml } is_operator_ready() { local phase phase=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}') echo "PHASE: $phase" [[ "$phase" == "Ready" ]] } wait_for_rook_operator() { local max_attempts=10 local sleep_interval=20 local attempts=0 $KUBECTL rollout status deployment rook-ceph-operator -n rook-ceph --timeout=180s while ! is_operator_ready; do echo "Waiting for rook operator to be ready..." sleep $sleep_interval # log current cluster state and pods info for debugging PHASE=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}') $KUBECTL -n rook-ceph get pods attempts=$((attempts + 1)) if [ $attempts -ge $max_attempts ]; then echo "Maximum number of attempts ($max_attempts) reached. Exiting..." $KUBECTL -n rook-ceph get pods | grep operator | awk '{print $1}' | xargs $KUBECTL -n rook-ceph logs return 1 fi done } wait_for_ceph_cluster() { local max_attempts=10 local sleep_interval=20 local attempts=0 $KUBECTL rollout status deployment rook-ceph-tools -n rook-ceph --timeout=90s while ! $KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.ceph.health}' | grep -q "HEALTH_OK"; do echo "Waiting for Ceph cluster to enter HEALTH_OK" state sleep $sleep_interval attempts=$((attempts+1)) if [ $attempts -ge $max_attempts ]; then echo "Maximum number of attempts ($max_attempts) reached. Exiting..." return 1 fi done echo "Ceph cluster installed and running" # add an additional wait to cover with any subttle change in the state sleep 20 } configure_libvirt(){ if sudo usermod -aG libvirt $(id -un); then echo "User added to libvirt group successfully." sudo systemctl enable --now libvirtd sudo systemctl restart libvirtd sleep 30 # wait some time for libvirtd service to restart newgrp libvirt else echo "Error adding user to libvirt group." return 1 fi } recreate_default_network(){ # destroy any existing kvm default network if sudo virsh net-destroy default; then sudo virsh net-undefine default fi # let's create a new kvm default network sudo virsh net-define /usr/share/libvirt/networks/default.xml if sudo virsh net-start default; then echo "Network 'default' started successfully." else # Optionally, handle the error echo "Failed to start network 'default', but continuing..." fi # restart libvirtd service and wait a little bit for the service sudo systemctl restart libvirtd sleep 30 # Just some debugging information all_networks=$(virsh net-list --all) groups=$(groups) } enable_rook_orchestrator() { echo "Enabling rook orchestrator" $KUBECTL rollout status deployment rook-ceph-tools -n "$ROOK_CLUSTER_NS" --timeout=90s $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph mgr module enable rook $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch set backend rook $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch status } enable_monitoring() { echo "Enabling monitoring" $KUBECTL apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml $KUBECTL wait --for=condition=ready pod -l app.kubernetes.io/name=prometheus-operator --timeout=90s $KUBECTL apply -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/rbac.yaml $KUBECTL apply -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/service-monitor.yaml $KUBECTL apply -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/exporter-service-monitor.yaml $KUBECTL apply -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/prometheus.yaml $KUBECTL apply -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/prometheus-service.yaml } #################################################################### #################################################################### trap 'on_error $? $LINENO' ERR configure_libvirt recreate_default_network setup_minikube_env build_ceph_image create_rook_cluster wait_for_rook_operator wait_for_ceph_cluster enable_rook_orchestrator enable_monitoring sleep 30 # wait for the metrics cache warmup #################################################################### ####################################################################