upgrade.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /*
  2. Copyright 2019 The Rook Authors. All rights reserved.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package client
  14. import (
  15. "encoding/json"
  16. "strings"
  17. "time"
  18. "github.com/pkg/errors"
  19. cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
  20. "github.com/rook/rook/pkg/clusterd"
  21. cephver "github.com/rook/rook/pkg/operator/ceph/version"
  22. "github.com/rook/rook/pkg/util"
  23. "k8s.io/apimachinery/pkg/util/sets"
  24. )
  25. const (
  26. defaultMaxRetries = 10
  27. defaultRetryDelay = 60 * time.Second
  28. defaultOSDRetryDelay = 10 * time.Second
  29. )
  30. var (
  31. // we don't perform any checks on these daemons
  32. // they don't have any "ok-to-stop" command implemented
  33. daemonNoCheck = []string{"mgr", "rgw", "rbd-mirror", "nfs", "fs-mirror"}
  34. errNoHostInCRUSH = errors.New("no host in crush map yet?")
  35. )
  36. func getCephMonVersionString(context *clusterd.Context, clusterInfo *ClusterInfo) (string, error) {
  37. args := []string{"version"}
  38. buf, err := NewCephCommand(context, clusterInfo, args).Run()
  39. if err != nil {
  40. return "", errors.Wrapf(err, "failed to run 'ceph version'. %s", string(buf))
  41. }
  42. output := string(buf)
  43. logger.Debug(output)
  44. return output, nil
  45. }
  46. func getAllCephDaemonVersionsString(context *clusterd.Context, clusterInfo *ClusterInfo) (string, error) {
  47. args := []string{"versions"}
  48. buf, err := NewCephCommand(context, clusterInfo, args).Run()
  49. if err != nil {
  50. return "", errors.Wrapf(err, "failed to run 'ceph versions'. %s", string(buf))
  51. }
  52. output := string(buf)
  53. logger.Debug(output)
  54. return output, nil
  55. }
  56. // GetCephMonVersion reports the Ceph version of all the monitors, or at least a majority with quorum
  57. func GetCephMonVersion(context *clusterd.Context, clusterInfo *ClusterInfo) (*cephver.CephVersion, error) {
  58. output, err := getCephMonVersionString(context, clusterInfo)
  59. if err != nil {
  60. return nil, err
  61. }
  62. logger.Debug(output)
  63. v, err := cephver.ExtractCephVersion(output)
  64. if err != nil {
  65. return nil, errors.Wrap(err, "failed to extract ceph version")
  66. }
  67. return v, nil
  68. }
  69. // GetAllCephDaemonVersions reports the Ceph version of each daemon in the cluster
  70. func GetAllCephDaemonVersions(context *clusterd.Context, clusterInfo *ClusterInfo) (*cephv1.CephDaemonsVersions, error) {
  71. output, err := getAllCephDaemonVersionsString(context, clusterInfo)
  72. if err != nil {
  73. return nil, err
  74. }
  75. logger.Debug(output)
  76. var cephVersionsResult cephv1.CephDaemonsVersions
  77. err = json.Unmarshal([]byte(output), &cephVersionsResult)
  78. if err != nil {
  79. return nil, errors.Wrap(err, "failed to retrieve ceph versions results")
  80. }
  81. return &cephVersionsResult, nil
  82. }
  83. // EnableReleaseOSDFunctionality disallows pre-Nautilus OSDs and enables all new Nautilus-only functionality
  84. func EnableReleaseOSDFunctionality(context *clusterd.Context, clusterInfo *ClusterInfo, release string) error {
  85. args := []string{"osd", "require-osd-release", release}
  86. buf, err := NewCephCommand(context, clusterInfo, args).Run()
  87. if err != nil {
  88. return errors.Wrapf(err, "failed to disallow pre-%s osds and enable all new %s-only functionality", release, release)
  89. }
  90. output := string(buf)
  91. logger.Debug(output)
  92. logger.Infof("successfully disallowed pre-%s osds and enabled all new %s-only functionality", release, release)
  93. return nil
  94. }
  95. // OkToStop determines if it's ok to stop an upgrade
  96. func OkToStop(context *clusterd.Context, clusterInfo *ClusterInfo, deployment, daemonType, daemonName string) error {
  97. okToStopRetries, okToStopDelay := getRetryConfig(clusterInfo, daemonType)
  98. versions, err := GetAllCephDaemonVersions(context, clusterInfo)
  99. if err != nil {
  100. return errors.Wrap(err, "failed to get ceph daemons versions")
  101. }
  102. switch daemonType {
  103. // Trying to handle the case where a **single** mon is deployed and an upgrade is called
  104. case "mon":
  105. // if len(versions.Mon) > 1, this means we have different Ceph versions for some monitor(s).
  106. // This is fine, we can run the upgrade checks
  107. if len(versions.Mon) == 1 {
  108. // now trying to parse and find how many mons are presents
  109. // if we have less than 3 mons we skip the check and do best-effort
  110. // we do less than 3 because during the initial bootstrap the mon sequence is updated too
  111. // so running the check on 2/3 mon fails
  112. // versions.Mon looks like this map[ceph version 17.0.0-12-g6c8fb92 (6c8fb920cb1d862f36ee852ed849a15f9a50bd68) quincy (dev):1]
  113. // now looping over a single element since we can't address the key directly (we don't know its name)
  114. for _, monCount := range versions.Mon {
  115. if monCount < 3 {
  116. logger.Infof("the cluster has fewer than 3 monitors, not performing upgrade check, running in best-effort")
  117. return nil
  118. }
  119. }
  120. }
  121. // Trying to handle the case where a **single** osd is deployed and an upgrade is called
  122. case "osd":
  123. if osdDoNothing(context, clusterInfo) {
  124. return nil
  125. }
  126. }
  127. // we don't implement any checks for mon, rgw and rbdmirror since:
  128. // - mon: the is done in the monitor code since it ensures all the mons are always in quorum before continuing
  129. // - rgw: the pod spec has a liveness probe so if the pod successfully start
  130. // - rbdmirror: you can chain as many as you want like mdss but there is no ok-to-stop logic yet
  131. err = util.Retry(okToStopRetries, okToStopDelay, func() error {
  132. return okToStopDaemon(context, clusterInfo, deployment, daemonType, daemonName)
  133. })
  134. if err != nil {
  135. return errors.Wrapf(err, "failed to check if %s was ok to stop", deployment)
  136. }
  137. return nil
  138. }
  139. // OkToContinue determines if it's ok to continue an upgrade
  140. func OkToContinue(context *clusterd.Context, clusterInfo *ClusterInfo, deployment, daemonType, daemonName string) error {
  141. // the mon case is handled directly in the deployment where the mon checks for quorum
  142. switch daemonType {
  143. case "mds":
  144. err := okToContinueMDSDaemon(context, clusterInfo, deployment, daemonType, daemonName)
  145. if err != nil {
  146. return errors.Wrapf(err, "failed to check if %s was ok to continue", deployment)
  147. }
  148. }
  149. return nil
  150. }
  151. func okToStopDaemon(context *clusterd.Context, clusterInfo *ClusterInfo, deployment, daemonType, daemonName string) error {
  152. if !sets.NewString(daemonNoCheck...).Has(daemonType) {
  153. args := []string{daemonType, "ok-to-stop", daemonName}
  154. buf, err := NewCephCommand(context, clusterInfo, args).Run()
  155. if err != nil {
  156. return errors.Wrapf(err, "deployment %s cannot be stopped", deployment)
  157. }
  158. output := string(buf)
  159. logger.Debugf("deployment %s is ok to be updated. %s", deployment, output)
  160. }
  161. // At this point, we can't tell if the daemon is unknown or if
  162. // but it's not a problem since perhaps it has no "ok-to-stop" call
  163. // It's fine to return nil here
  164. logger.Debugf("deployment %s is ok to be updated.", deployment)
  165. return nil
  166. }
  167. // okToContinueMDSDaemon determines whether it's fine to go to the next mds during an upgrade
  168. // mostly a placeholder function for the future but since we have standby mds this shouldn't be needed
  169. func okToContinueMDSDaemon(context *clusterd.Context, clusterInfo *ClusterInfo, deployment, daemonType, daemonName string) error {
  170. // wait for the MDS to be active again or in standby-replay
  171. retries, delay := getRetryConfig(clusterInfo, "mds")
  172. err := util.Retry(retries, delay, func() error {
  173. return MdsActiveOrStandbyReplay(context, clusterInfo, findFSName(deployment))
  174. })
  175. if err != nil {
  176. return err
  177. }
  178. return nil
  179. }
  180. // LeastUptodateDaemonVersion returns the ceph version of the least updated daemon type
  181. // So if we invoke this method function with "mon", it will look for the least recent version
  182. // Assume the following:
  183. //
  184. // "mon": {
  185. // "ceph version 18.2.5 (cbff874f9007f1869bfd3821b7e33b2a6ffd4988) reef (stable)": 2,
  186. // "ceph version 17.2.6 (3a54b2b6d167d4a2a19e003a705696d4fe619afc) quincy (stable)": 1
  187. // }
  188. //
  189. // In the case we will pick: "ceph version 18.2.5 (cbff874f9007f1869bfd3821b7e33b2a6ffd4988) reef (stable)": 2,
  190. // And eventually return 18.2.5
  191. func LeastUptodateDaemonVersion(context *clusterd.Context, clusterInfo *ClusterInfo, daemonType string) (cephver.CephVersion, error) {
  192. var r map[string]int
  193. var vv cephver.CephVersion
  194. // Always invoke ceph version before an upgrade so we are sure to be up-to-date
  195. versions, err := GetAllCephDaemonVersions(context, clusterInfo)
  196. if err != nil {
  197. return vv, errors.Wrap(err, "failed to get ceph daemons versions")
  198. }
  199. r, err = daemonMapEntry(versions, daemonType)
  200. if err != nil {
  201. return vv, errors.Wrap(err, "failed to find daemon map entry")
  202. }
  203. for v := range r {
  204. version, err := cephver.ExtractCephVersion(v)
  205. if err != nil {
  206. return vv, errors.Wrap(err, "failed to extract ceph version")
  207. }
  208. vv = *version
  209. // break right after the first iteration
  210. // the first one is always the least up-to-date
  211. break
  212. }
  213. return vv, nil
  214. }
  215. func findFSName(deployment string) string {
  216. return strings.TrimPrefix(deployment, "rook-ceph-mds-")
  217. }
  218. func daemonMapEntry(versions *cephv1.CephDaemonsVersions, daemonType string) (map[string]int, error) {
  219. switch daemonType {
  220. case "mon":
  221. return versions.Mon, nil
  222. case "mgr":
  223. return versions.Mgr, nil
  224. case "mds":
  225. return versions.Mds, nil
  226. case "osd":
  227. return versions.Osd, nil
  228. case "rgw":
  229. return versions.Rgw, nil
  230. case "mirror":
  231. return versions.RbdMirror, nil
  232. }
  233. return nil, errors.Errorf("invalid daemonType %s", daemonType)
  234. }
  235. func allOSDsSameHost(context *clusterd.Context, clusterInfo *ClusterInfo) (bool, error) {
  236. tree, err := HostTree(context, clusterInfo)
  237. if err != nil {
  238. return false, errors.Wrap(err, "failed to get the osd tree")
  239. }
  240. osds, err := OsdListNum(context, clusterInfo)
  241. if err != nil {
  242. return false, errors.Wrap(err, "failed to get the osd list")
  243. }
  244. hostOsdTree, err := buildHostListFromTree(tree)
  245. if err != nil {
  246. return false, errors.Wrap(err, "failed to build osd tree")
  247. }
  248. hostOsdNodes := len(hostOsdTree.Nodes)
  249. if hostOsdNodes == 0 {
  250. return false, errNoHostInCRUSH
  251. }
  252. // If the number of OSD node is 1, chances are this is simple setup with all OSDs on it
  253. if hostOsdNodes == 1 {
  254. // number of OSDs on that host
  255. hostOsdNum := len(hostOsdTree.Nodes[0].Children)
  256. // we take the total number of OSDs and remove the OSDs that are out of the CRUSH map
  257. osdUp := len(osds) - len(tree.Stray)
  258. // If the number of children of that host (basically OSDs) is equal to the total number of OSDs
  259. // We can assume that all OSDs are running on the same machine
  260. if hostOsdNum == osdUp {
  261. return true, nil
  262. }
  263. }
  264. return false, nil
  265. }
  266. func buildHostListFromTree(tree OsdTree) (OsdTree, error) {
  267. var osdList OsdTree
  268. if tree.Nodes == nil {
  269. return osdList, errors.New("osd tree not populated, missing 'nodes' field")
  270. }
  271. for _, t := range tree.Nodes {
  272. if t.Type == "host" {
  273. osdList.Nodes = append(osdList.Nodes, t)
  274. }
  275. }
  276. return osdList, nil
  277. }
  278. // OSDUpdateShouldCheckOkToStop returns true if Rook should check ok-to-stop for OSDs when doing
  279. // OSD daemon updates. It will return false if it should not perform ok-to-stop checks, for example,
  280. // when there are fewer than 3 OSDs
  281. func OSDUpdateShouldCheckOkToStop(context *clusterd.Context, clusterInfo *ClusterInfo) bool {
  282. userIntervention := "the user will likely need to set continueUpgradeAfterChecksEvenIfNotHealthy to allow OSD updates to proceed"
  283. osds, err := OsdListNum(context, clusterInfo)
  284. if err != nil {
  285. // If calling osd list fails, we assume there are more than 3 OSDs and we check if ok-to-stop
  286. // If there are less than 3 OSDs, the ok-to-stop call will fail
  287. // this can still be controlled by setting continueUpgradeAfterChecksEvenIfNotHealthy
  288. // At least this will happen for a single OSD only, which means 2 OSDs will restart in a small interval
  289. logger.Warningf("failed to determine the total number of osds. will check if OSDs are ok-to-stop. if there are fewer than 3 OSDs %s. %v", userIntervention, err)
  290. return true
  291. }
  292. if len(osds) < 3 {
  293. logger.Warningf("the cluster has fewer than 3 osds. not performing upgrade check. running in best-effort")
  294. return false
  295. }
  296. // aio means all in one
  297. aio, err := allOSDsSameHost(context, clusterInfo)
  298. if err != nil {
  299. if errors.Is(err, errNoHostInCRUSH) {
  300. logger.Warning("the CRUSH map has no 'host' entries so not performing ok-to-stop checks")
  301. return false
  302. }
  303. logger.Warningf("failed to determine if all osds are running on the same host. will check if OSDs are ok-to-stop. if all OSDs are running on one host %s. %v", userIntervention, err)
  304. return true
  305. }
  306. if aio {
  307. logger.Warningf("all OSDs are running on the same host. not performing upgrade check. running in best-effort")
  308. return false
  309. }
  310. return true
  311. }
  312. // osdDoNothing determines whether we should perform upgrade pre-check and post-checks for the OSD daemon
  313. // it checks for various cluster info like number of OSD and their placement
  314. // it returns 'true' if we need to do nothing and false and we should pre-check/post-check
  315. func osdDoNothing(context *clusterd.Context, clusterInfo *ClusterInfo) bool {
  316. osds, err := OsdListNum(context, clusterInfo)
  317. if err != nil {
  318. logger.Warningf("failed to determine the total number of osds. will check if the osd is ok-to-stop anyways. %v", err)
  319. // If calling osd list fails, we assume there are more than 3 OSDs and we check if ok-to-stop
  320. // If there are less than 3 OSDs, the ok-to-stop call will fail
  321. // this can still be controlled by setting continueUpgradeAfterChecksEvenIfNotHealthy
  322. // At least this will happen for a single OSD only, which means 2 OSDs will restart in a small interval
  323. return false
  324. }
  325. if len(osds) < 3 {
  326. logger.Warningf("the cluster has fewer than 3 osds, not performing upgrade check, running in best-effort")
  327. return true
  328. }
  329. // aio means all in one
  330. aio, err := allOSDsSameHost(context, clusterInfo)
  331. if err != nil {
  332. // We return true so that we can continue without a retry and subsequently not test if the
  333. // osd can be stopped This handles the scenario where the OSDs have been created but not yet
  334. // started due to a wrong CR configuration For instance, when OSDs are encrypted and Vault
  335. // is used to store encryption keys, if the KV version is incorrect during the cluster
  336. // initialization the OSDs will fail to start and stay in CLBO until the CR is updated again
  337. // with the correct KV version so that it can start For this scenario we don't need to go
  338. // through the path where the check whether the OSD can be stopped or not, so it will always
  339. // fail and make us wait for nothing
  340. if errors.Is(err, errNoHostInCRUSH) {
  341. logger.Warning("the CRUSH map has no 'host' entries so not performing ok-to-stop checks")
  342. return true
  343. }
  344. logger.Warningf("failed to determine if all osds are running on the same host, performing upgrade check anyways. %v", err)
  345. return false
  346. }
  347. if aio {
  348. logger.Warningf("all OSDs are running on the same host, not performing upgrade check, running in best-effort")
  349. return true
  350. }
  351. return false
  352. }
  353. func getRetryConfig(clusterInfo *ClusterInfo, daemonType string) (int, time.Duration) {
  354. switch daemonType {
  355. case "osd":
  356. return int(clusterInfo.OsdUpgradeTimeout / defaultOSDRetryDelay), defaultOSDRetryDelay
  357. case "mds":
  358. return defaultMaxRetries, 15 * time.Second
  359. }
  360. return defaultMaxRetries, defaultRetryDelay
  361. }