123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- #!/usr/bin/env bash
- #############
- #############
- function calculateSize() {
- local currentsize=$2
- local unit=$1
- rawsizeValue=0 # rawsizeValue is a global variable
- if [[ "$currentsize" == *"Mi" ]]
- then
- rawSize=${currentsize//Mi} # rawSize is a global variable
- unitSize="Mi"
- rawsizeValue=$rawSize
- elif [[ "$currentsize" == *"Gi" ]]
- then
- rawSize=${currentsize//Gi}
- unitSize="Gi"
- rawsizeValue=$(( rawSize * 1000 ))
- elif [[ "$currentsize" == *"Ti" ]]
- then
- rawSize=${currentsize//Ti}
- unitSize="Ti"
- rawsizeValue=$(( rawSize * 1000000 ))
- else
- echo "Unknown unit of $unit : ${currentsize}"
- echo "Supported units are 'Mi','Gi','Ti'"
- exit 1
- fi
- }
- function compareSizes() {
- local newsize=$1
- local maxsize=$2
- calculateSize newsize "${newsize}" # rawsizeValue is calculated and used for further process
- local newsize=$rawsizeValue
- calculateSize maxsize "${maxsize}"
- local maxsize=$rawsizeValue
- if [ "${newsize}" -ge "${maxsize}" ]
- then
- return "1"
- fi
- return "0"
- }
- function growVertically() {
- local growRate=$1
- local pvc=$2
- local ns=$3
- local maxSize=$4
- local currentSize
- currentSize=$(kubectl get pvc "${pvc}" -n "${ns}" -o json | jq -r '.spec.resources.requests.storage')
- echo "PVC(OSD) current size is ${currentSize} and will be increased by ${growRate}%."
- calculateSize "${pvc}" "${currentSize}" # rawSize is calculated and used for further process
- if ! [[ "${rawSize}" =~ ^[0-9]+$ ]]
- then
- echo "disk size should be an integer"
- else
- newSize=$(echo "${rawSize}+(${rawSize} * ${growRate})/100" | bc | cut -f1 -d'.')
- if [ "${newSize}" = "${rawSize}" ]
- then
- newSize=$(( rawSize + 1 ))
- echo "New adjusted calculated size for the PVC is ${newSize}${unitSize}"
- else
- echo "New calculated size for the PVC is ${newSize}${unitSize}"
- fi
- compareSizes ${newSize}${unitSize} "${maxSize}"
- if [ "1" = $? ]
- then
- newSize=${maxSize}
- echo "Disk has reached it's MAX capacity ${maxSize}, add a new disk to it"
- result=$(kubectl patch pvc "${pvc}" -n "${ns}" --type json --patch "[{ op: replace, path: /spec/resources/requests/storage, value: ${newSize} }]")
- else
- result=$(kubectl patch pvc "${pvc}" -n "${ns}" --type json --patch "[{ op: replace, path: /spec/resources/requests/storage, value: ${newSize}${unitSize} }]")
- fi
- echo "${result}"
- fi
- }
- function growHorizontally() {
- local increaseOSDCount=$1
- local pvc=$2
- local ns=$3
- local maxOSDCount=$4
- local deviceSetName
- local cluster=""
- local deviceSet=""
- local currentOSDCount=0
- local clusterCount=0
- local deviceSetCount=0
- deviceSetName=$(kubectl get pvc "${pvc}" -n "${ns}" -o json | jq -r '.metadata.labels."ceph.rook.io/DeviceSet"')
- while [ "$cluster" != "null" ]
- do
- cluster=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}]")
- while [ "$deviceSet" != "null" ]
- do
- deviceSet=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].name")
- if [[ $deviceSet == "${deviceSetName}" ]]
- then
- currentOSDCount=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].count")
- finalCount=$(( "${currentOSDCount}" + "${increaseOSDCount}" ))
- echo "OSD count: ${currentOSDCount}. OSD count will be increased by ${increaseOSDCount}."
- if [ "${finalCount}" -ge "${maxOSDCount}" ]
- then
- finalCount=${maxOSDCount}
- echo "DeviceSet ${deviceSet} capacity is full, cannot add more OSD to it"
- fi
- echo "Total count of OSDs for deviceset ${deviceSetName} is set to ${finalCount}."
- clusterName=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].metadata.name" )
- result=$(kubectl patch CephCluster "${clusterName}" -n "${ns}" --type json --patch "[{ op: replace, path: /spec/storage/storageClassDeviceSets/${deviceSetCount}/count, value: ${finalCount} }]")
- echo "${result}"
- break
- fi
- deviceSetCount=$((deviceSetCount+1))
- deviceSet=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].name")
- done
- clusterCount=$((clusterCount+1))
- cluster=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}]")
- done
- }
- function growOSD(){
- itr=0
- alertmanagerroute=$(kubectl -n rook-ceph -o jsonpath="{.status.hostIP}" get pod prometheus-rook-prometheus-0)
- route=${alertmanagerroute}:30900
- toolbox=$(kubectl get pods -n rook-ceph | grep -i rook-ceph-tools | awk '{ print $1 }')
- alerts=$(kubectl exec -it "${toolbox}" -n rook-ceph -- bash -c "curl -s http://${route}/api/v1/alerts")
- export total_alerts
- total_alerts=$( jq '.data.alerts | length' <<< "${alerts}")
- echo "Looping at $(date +"%Y-%m-%d %H:%M:%S")"
- while true
- do
- if [ "${total_alerts}" == "" ]
- then
- echo "Alert manager not configured,re-run the script"
- exit 1
- fi
- export entry
- entry=$( jq ".data.alerts[$itr]" <<< "${alerts}")
- thename=$(echo "${entry}" | jq -r '.labels.alertname')
- if [ "${thename}" = "CephOSDNearFull" ] || [ "${thename}" = "CephOSDCriticallyFull" ]
- then
- echo "${entry}"
- ns=$(echo "${entry}" | jq -r '.labels.namespace')
- osdID=$(echo "${entry}" | jq -r '.labels.ceph_daemon')
- osdID=${osdID/./-}
- pvc=$(kubectl get deployment -n "${ns}" rook-ceph-"${osdID}" -o json | jq -r '.metadata.labels."ceph.rook.io/pvc"')
- if [[ $pvc == null ]]
- then
- echo "PVC not found, script can only run on PVC-based cluster"
- exit 1
- fi
- echo "Processing NearFull or Full alert for PVC ${pvc} in namespace ${ns}"
- if [[ $1 == "count" ]]
- then
- growHorizontally "$2" "${pvc}" "${ns}" "$3"
- else
- growVertically "$2" "${pvc}" "${ns}" "$3"
- fi
- fi
- (( itr = itr + 1 ))
- if [[ "${itr}" == "${total_alerts}" ]] || [[ "${total_alerts}" == "0" ]]
- then
- sleep 600
- alerts=$(kubectl exec -it "${toolbox}" -n rook-ceph -- bash -c "curl -s http://${route}/api/v1/alerts")
- total_alerts=$( jq '.data.alerts | length' <<< "${alerts}")
- itr=0
- echo "Looping at $(date +"%Y-%m-%d %H:%M:%S")"
- fi
- done
- }
- function creatingPrerequisites(){
- echo "creating Prerequisites deployments - Prometheus Operator and Prometheus Instances"
- # creating Prometheus operator
- kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml
- # waiting for Prometheus operator to get ready
- timeout 30 sh -c "until [ $(kubectl get pod -l app.kubernetes.'io/name'=prometheus-operator -o json | jq -r '.items[0].status.phase') = Running ]; do echo 'waiting for prometheus-operator to get created' && sleep 1; done"
- # creating a service monitor that will watch the Rook cluster and collect metrics regularly
- kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/service-monitor.yaml
- # create the PrometheusRule for Rook alerts.
- kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/prometheus-ceph-v14-rules.yaml
- # create prometheus-rook-prometheus-0 pod
- kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/prometheus.yaml
- # create prometheus-service
- kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/monitoring/prometheus-service.yaml
- # waiting for prometheus-rook-prometheus-0 pod to get ready
- timeout 60 sh -c "until [ $(kubectl get pod -l prometheus=rook-prometheus -nrook-ceph -o json | jq -r '.items[0].status.phase') = Running ]; do echo 'waiting for prometheus-rook-prometheus-0 pod to get created' && sleep 1; done"
- if [ "$(kubectl get pod -l prometheus=rook-prometheus -nrook-ceph)" == "" ]
- then
- echo "prometheus-rook-prometheus-0 pod not created, re-run the script"
- exit 1
- fi
- echo "Prerequisites deployments created"
- }
- function invalidCall(){
- echo " $0 [command]
- Available Commands for normal cluster:
- ./auto-grow-storage.sh count --max maxCount --count rate Scale horizontally by adding more OSDs to the cluster
- ./auto-grow-storage.sh size --max maxSize --growth-rate percent Scale vertically by increasing the size of existing OSDs
- " >&2
- }
- case "${1:-}" in
- count)
- if [[ $# -ne 5 ]]; then
- echo "incorrect command to run the script"
- invalidCall
- exit 1
- fi
- max=$3
- count=$5
- if ! [[ "${max}" =~ ^[0-9]+$ ]]
- then
- echo "maxCount should be an integer"
- invalidCall
- exit 1
- fi
- if ! [[ "${count}" =~ ^[0-9]+$ ]]
- then
- echo "rate should be an integer"
- invalidCall
- exit 1
- fi
- creatingPrerequisites
- echo "Adding on nearfull and full alert and number of OSD to add is ${count}"
- growOSD count "${count}" "${max}"
- ;;
- size)
- if [[ $# -ne 5 ]]; then
- echo "incorrect command to run the script"
- invalidCall
- exit 1
- fi
- max=$3
- growRate=$5
- if [[ "${max}" =~ ^[0-9]+$ ]]
- then
- echo "maxSize should be an string"
- invalidCall
- exit 1
- fi
- if ! [[ "${growRate}" =~ ^[0-9]+$ ]]
- then
- echo "growth-rate should be an integer"
- invalidCall
- exit 1
- fi
- creatingPrerequisites
- echo "Resizing on nearfull and full alert and Expansion percentage set to ${growRate}%"
- growOSD size "${growRate}" "${max}"
- ;;
- *)
- invalidCall
- ;;
- esac