123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385 |
- /*
- Copyright 2016 The Rook Authors. All rights reserved.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package client
- import (
- "encoding/json"
- "fmt"
- "strconv"
- "strings"
- "github.com/pkg/errors"
- "github.com/rook/rook/pkg/clusterd"
- )
- type OSDUsage struct {
- OSDNodes []OSDNodeUsage `json:"nodes"`
- Summary struct {
- TotalKB json.Number `json:"total_kb"`
- TotalUsedKB json.Number `json:"total_kb_used"`
- TotalAvailKB json.Number `json:"total_kb_avail"`
- AverageUtil json.Number `json:"average_utilization"`
- } `json:"summary"`
- }
- type OSDNodeUsage struct {
- ID int `json:"id"`
- Name string `json:"name"`
- CrushWeight json.Number `json:"crush_weight"`
- Depth json.Number `json:"depth"`
- Reweight json.Number `json:"reweight"`
- KB json.Number `json:"kb"`
- UsedKB json.Number `json:"kb_used"`
- AvailKB json.Number `json:"kb_avail"`
- Utilization json.Number `json:"utilization"`
- Variance json.Number `json:"var"`
- Pgs json.Number `json:"pgs"`
- }
- type OSDPerfStats struct {
- PerfInfo []struct {
- ID json.Number `json:"id"`
- Stats struct {
- CommitLatency json.Number `json:"commit_latency_ms"`
- ApplyLatency json.Number `json:"apply_latency_ms"`
- } `json:"perf_stats"`
- } `json:"osd_perf_infos"`
- }
- type OSDDump struct {
- OSDs []struct {
- OSD json.Number `json:"osd"`
- Up json.Number `json:"up"`
- In json.Number `json:"in"`
- } `json:"osds"`
- Flags string `json:"flags"`
- CrushNodeFlags map[string][]string `json:"crush_node_flags"`
- }
- // IsFlagSet checks if an OSD flag is set
- func (dump *OSDDump) IsFlagSet(checkFlag string) bool {
- flags := strings.Split(dump.Flags, ",")
- for _, flag := range flags {
- if flag == checkFlag {
- return true
- }
- }
- return false
- }
- // IsFlagSetOnCrushUnit checks if an OSD flag is set on specified Crush unit
- func (dump *OSDDump) IsFlagSetOnCrushUnit(checkFlag, crushUnit string) bool {
- for unit, list := range dump.CrushNodeFlags {
- if crushUnit == unit {
- for _, flag := range list {
- if flag == checkFlag {
- return true
- }
- }
- }
- }
- return false
- }
- // UpdateFlagOnCrushUnit checks if the flag is in the desired state and sets/unsets if it isn't. Mitigates redundant calls
- // it returns true if the value was changed
- func (dump *OSDDump) UpdateFlagOnCrushUnit(context *clusterd.Context, clusterInfo *ClusterInfo, set bool, crushUnit, flag string) (bool, error) {
- flagSet := dump.IsFlagSetOnCrushUnit(flag, crushUnit)
- if flagSet && !set {
- err := UnsetFlagOnCrushUnit(context, clusterInfo, crushUnit, flag)
- if err != nil {
- return true, err
- }
- return true, nil
- }
- if !flagSet && set {
- err := SetFlagOnCrushUnit(context, clusterInfo, crushUnit, flag)
- if err != nil {
- return true, err
- }
- return true, nil
- }
- return false, nil
- }
- // SetFlagOnCrushUnit sets the specified flag on the crush unit
- func SetFlagOnCrushUnit(context *clusterd.Context, clusterInfo *ClusterInfo, crushUnit, flag string) error {
- args := []string{"osd", "set-group", flag, crushUnit}
- cmd := NewCephCommand(context, clusterInfo, args)
- _, err := cmd.Run()
- if err != nil {
- return errors.Wrapf(err, "failed to set flag %s on %s", crushUnit, flag)
- }
- return nil
- }
- // UnsetFlagOnCrushUnit unsets the specified flag on the crush unit
- func UnsetFlagOnCrushUnit(context *clusterd.Context, clusterInfo *ClusterInfo, crushUnit, flag string) error {
- args := []string{"osd", "unset-group", flag, crushUnit}
- cmd := NewCephCommand(context, clusterInfo, args)
- _, err := cmd.Run()
- if err != nil {
- return errors.Wrapf(err, "failed to unset flag %s on %s", crushUnit, flag)
- }
- return nil
- }
- type SafeToDestroyStatus struct {
- SafeToDestroy []int `json:"safe_to_destroy"`
- }
- // OsdTree represents the CRUSH hierarchy
- type OsdTree struct {
- Nodes []struct {
- ID int `json:"id"`
- Name string `json:"name"`
- Type string `json:"type"`
- TypeID int `json:"type_id"`
- Children []int `json:"children,omitempty"`
- PoolWeights struct {
- } `json:"pool_weights,omitempty"`
- CrushWeight float64 `json:"crush_weight,omitempty"`
- Depth int `json:"depth,omitempty"`
- Exists int `json:"exists,omitempty"`
- Status string `json:"status,omitempty"`
- Reweight float64 `json:"reweight,omitempty"`
- PrimaryAffinity float64 `json:"primary_affinity,omitempty"`
- } `json:"nodes"`
- Stray []struct {
- ID int `json:"id"`
- Name string `json:"name"`
- Type string `json:"type"`
- TypeID int `json:"type_id"`
- CrushWeight float64 `json:"crush_weight"`
- Depth int `json:"depth"`
- Exists int `json:"exists"`
- Status string `json:"status"`
- Reweight float64 `json:"reweight"`
- PrimaryAffinity float64 `json:"primary_affinity"`
- } `json:"stray"`
- }
- // OsdList returns the list of OSD by their IDs
- type OsdList []int
- // StatusByID returns status and inCluster states for given OSD id
- func (dump *OSDDump) StatusByID(id int64) (int64, int64, error) {
- for _, d := range dump.OSDs {
- i, err := d.OSD.Int64()
- if err != nil {
- return 0, 0, err
- }
- if id == i {
- in, err := d.In.Int64()
- if err != nil {
- return 0, 0, err
- }
- up, err := d.Up.Int64()
- if err != nil {
- return 0, 0, err
- }
- return up, in, nil
- }
- }
- return 0, 0, errors.Errorf("not found osd.%d in OSDDump", id)
- }
- func GetOSDUsage(context *clusterd.Context, clusterInfo *ClusterInfo) (*OSDUsage, error) {
- args := []string{"osd", "df"}
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return nil, errors.Wrap(err, "failed to get osd df")
- }
- var osdUsage OSDUsage
- if err := json.Unmarshal(buf, &osdUsage); err != nil {
- return nil, errors.Wrap(err, "failed to unmarshal osd df response")
- }
- return &osdUsage, nil
- }
- func GetOSDPerfStats(context *clusterd.Context, clusterInfo *ClusterInfo) (*OSDPerfStats, error) {
- args := []string{"osd", "perf"}
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return nil, errors.Wrap(err, "failed to get osd perf")
- }
- var osdPerfStats OSDPerfStats
- if err := json.Unmarshal(buf, &osdPerfStats); err != nil {
- return nil, errors.Wrap(err, "failed to unmarshal osd perf response")
- }
- return &osdPerfStats, nil
- }
- func GetOSDDump(context *clusterd.Context, clusterInfo *ClusterInfo) (*OSDDump, error) {
- args := []string{"osd", "dump"}
- cmd := NewCephCommand(context, clusterInfo, args)
- buf, err := cmd.Run()
- if err != nil {
- return nil, errors.Wrap(err, "failed to get osd dump")
- }
- var osdDump OSDDump
- if err := json.Unmarshal(buf, &osdDump); err != nil {
- return nil, errors.Wrap(err, "failed to unmarshal osd dump response")
- }
- return &osdDump, nil
- }
- func OSDOut(context *clusterd.Context, clusterInfo *ClusterInfo, osdID int) (string, error) {
- args := []string{"osd", "out", strconv.Itoa(osdID)}
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- return string(buf), err
- }
- func OsdSafeToDestroy(context *clusterd.Context, clusterInfo *ClusterInfo, osdID int) (bool, error) {
- args := []string{"osd", "safe-to-destroy", strconv.Itoa(osdID)}
- cmd := NewCephCommand(context, clusterInfo, args)
- buf, err := cmd.Run()
- if err != nil {
- return false, errors.Wrap(err, "failed to get safe-to-destroy status")
- }
- var output SafeToDestroyStatus
- if err := json.Unmarshal(buf, &output); err != nil {
- return false, errors.Wrapf(err, "failed to unmarshal safe-to-destroy response. %s", string(buf))
- }
- if len(output.SafeToDestroy) != 0 && output.SafeToDestroy[0] == osdID {
- return true, nil
- }
- return false, nil
- }
- // HostTree returns the osd tree
- func HostTree(context *clusterd.Context, clusterInfo *ClusterInfo) (OsdTree, error) {
- var output OsdTree
- args := []string{"osd", "tree"}
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return output, errors.Wrap(err, "failed to get osd tree")
- }
- err = json.Unmarshal(buf, &output)
- if err != nil {
- return output, errors.Wrap(err, "failed to unmarshal 'osd tree' response")
- }
- return output, nil
- }
- // OsdListNum returns the list of OSDs
- func OsdListNum(context *clusterd.Context, clusterInfo *ClusterInfo) (OsdList, error) {
- var output OsdList
- args := []string{"osd", "ls"}
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return output, errors.Wrap(err, "failed to get osd list")
- }
- err = json.Unmarshal(buf, &output)
- if err != nil {
- return output, errors.Wrap(err, "failed to unmarshal 'osd ls' response")
- }
- return output, nil
- }
- // OSDDeviceClass report device class for osd
- type OSDDeviceClass struct {
- ID int `json:"osd"`
- DeviceClass string `json:"device_class"`
- }
- // OSDDeviceClasses returns the device classes for particular OsdIDs
- func OSDDeviceClasses(context *clusterd.Context, clusterInfo *ClusterInfo, osdIds []string) ([]OSDDeviceClass, error) {
- var deviceClasses []OSDDeviceClass
- args := []string{"osd", "crush", "get-device-class"}
- args = append(args, osdIds...)
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return deviceClasses, errors.Wrap(err, "failed to get device-class info")
- }
- err = json.Unmarshal(buf, &deviceClasses)
- if err != nil {
- return deviceClasses, errors.Wrap(err, "failed to unmarshal 'osd crush get-device-class' response")
- }
- return deviceClasses, nil
- }
- // OSDOkToStopStats report detailed information about which OSDs are okay to stop
- type OSDOkToStopStats struct {
- OkToStop bool `json:"ok_to_stop"`
- OSDs []int `json:"osds"`
- NumOkPGs int `json:"num_ok_pgs"`
- NumNotOkPGs int `json:"num_not_ok_pgs"`
- BadBecomeInactive []string `json:"bad_become_inactive"`
- OkBecomeDegraded []string `json:"ok_become_degraded"`
- }
- // OSDOkToStop returns a list of OSDs that can be stopped that includes the OSD ID given.
- // This is relevant, for example, when checking which OSDs can be updated.
- // The number of OSDs returned is limited by the value set in maxReturned.
- // maxReturned=0 is the same as maxReturned=1.
- func OSDOkToStop(context *clusterd.Context, clusterInfo *ClusterInfo, osdID, maxReturned int) ([]int, error) {
- args := []string{"osd", "ok-to-stop", strconv.Itoa(osdID)}
- // NOTE: if the number of OSD IDs given in the CLI arg query is Q and --max=N is given, if
- // N < Q, Ceph treats the query as though max=Q instead, always returning at least Q OSDs.
- args = append(args, fmt.Sprintf("--max=%d", maxReturned))
- buf, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- // is not ok to stop (or command error)
- return []int{}, errors.Wrapf(err, "OSD %d is not ok to stop", osdID)
- }
- var stats OSDOkToStopStats
- err = json.Unmarshal(buf, &stats)
- if err != nil {
- // Since the command succeeded we still know that at least the given OSD ID is ok to
- // stop, so we do not *have* to return an error. However, it is good to do it anyway so
- // that we can catch breaking changes to JSON output in CI testing. As a middle ground
- // here, return error but also return the given OSD ID in the output in case the calling
- // function wants to recover from this case.
- return []int{osdID}, errors.Wrapf(err, "failed to unmarshal 'osd ok-to-stop %d' response", osdID)
- }
- return stats.OSDs, nil
- }
- // SetPrimaryAffinity assigns primary-affinity (within range [0.0, 1.0]) to a specific OSD.
- func SetPrimaryAffinity(context *clusterd.Context, clusterInfo *ClusterInfo, osdID int, affinity string) error {
- logger.Infof("setting osd.%d with primary-affinity %q", osdID, affinity)
- args := []string{"osd", "primary-affinity", fmt.Sprintf("osd.%d", osdID), affinity}
- _, err := NewCephCommand(context, clusterInfo, args).Run()
- if err != nil {
- return errors.Wrapf(err, "failed to set osd.%d with primary-affinity %q", osdID, affinity)
- }
- logger.Infof("successfully applied osd.%d primary-affinity %q", osdID, affinity)
- return nil
- }
|