cluster.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. /*
  2. Copyright 2017 The Rook Authors. All rights reserved.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package clients
  14. import (
  15. "fmt"
  16. "github.com/rook/rook/pkg/daemon/ceph/client"
  17. )
  18. // IsClusterHealthy determines if the Rook cluster is currently healthy or not.
  19. func IsClusterHealthy(testClient *TestClient, namespace string) (bool, error) {
  20. status, err := testClient.Status(namespace)
  21. if err != nil {
  22. return false, err
  23. }
  24. logger.Infof("cluster status: %+v", status)
  25. // verify all mons are in quorum
  26. if len(status.Quorum) == 0 {
  27. return false, fmt.Errorf("too few monitors: %+v", status)
  28. }
  29. for _, mon := range status.MonMap.Mons {
  30. if !monInQuorum(mon, status.Quorum) {
  31. return false, fmt.Errorf("mon %s not in quorum: %v", mon.Name, status.Quorum)
  32. }
  33. }
  34. // verify there are OSDs and they are all up/in
  35. totalOSDs := status.OsdMap.NumOsd
  36. if totalOSDs == 0 {
  37. return false, fmt.Errorf("no OSDs: %+v", status)
  38. }
  39. if status.OsdMap.NumInOsd != totalOSDs || status.OsdMap.NumUpOsd != totalOSDs {
  40. return false, fmt.Errorf("not all OSDs are up/in: %+v", status)
  41. }
  42. // verify MGRs are available
  43. if !status.MgrMap.Available {
  44. return false, fmt.Errorf("MGRs are not available: %+v", status)
  45. }
  46. // verify that all PGs are in the active+clean state (0 PGs is OK because that means no pools
  47. // have been created yet)
  48. if status.PgMap.NumPgs > 0 {
  49. activeCleanCount := 0
  50. for _, pg := range status.PgMap.PgsByState {
  51. if pg.StateName == "active+clean" {
  52. activeCleanCount = pg.Count
  53. break
  54. }
  55. }
  56. if activeCleanCount != status.PgMap.NumPgs {
  57. return false, fmt.Errorf("not all PGs are active+clean: %+v", status.PgMap)
  58. }
  59. }
  60. // cluster passed all the basic health checks, seems healthy
  61. return true, nil
  62. }
  63. func monInQuorum(mon client.MonMapEntry, quorum []int) bool {
  64. for _, entry := range quorum {
  65. if entry == mon.Rank {
  66. return true
  67. }
  68. }
  69. return false
  70. }