Browse Source

[ADD]新增告警触发后的短信发送。基础版本,需要增加时间限制,国密SM3加密,页面配置

pujielan 1 month ago
parent
commit
a553ff4162
5 changed files with 208 additions and 19 deletions
  1. 10 0
      config/extend.go
  2. 7 0
      config/settings.yml
  3. 14 3
      handler/events.go
  4. 38 16
      handler/handler.go
  5. 139 0
      handler/sms.go

+ 10 - 0
config/extend.go

@@ -16,6 +16,7 @@ type Extend struct {
 	Otel              Otel
 	Prometheus        Prometheus
 	ClickhouseMetrics bool
+	SmsConfig         SmsConfig
 }
 
 type AMap struct {
@@ -64,3 +65,12 @@ type OTelMetrics struct {
 type Prometheus struct {
 	Address string
 }
+
+type SmsConfig struct {
+	Appkey    string //:= "95598109"
+	Appsecret string //:= "VXX8H0MzT7"
+	Url       string `json:"url"`
+	AppsGroup string //1:17233413341,18723344112;2:17233413341,18723344112;3:17233413341,18723344112
+	SmsTpl    string
+	// SqlRecord bool
+}

+ 7 - 0
config/settings.yml

@@ -101,4 +101,11 @@ settings:
         # endpoint: http://otel-collector.cestong.com.cn/v1/metrics
         endpoint: otel-collector.cestong.com.cn:80
         interval: 60 # 收集间隔,60s
+    smsconfig:
+      appKey: "95598109"
+      appSecret: "VXX8H0MzT7"
+      url: "http://msa.zs.test.js.sgcc.com.cn/jssms/api/sendsmsbatch" 
+      appsGroup: "opentelemetry-demo:18611342234,18722341127;demo:18511342234,18622341127"
+      smsTpl: "【江苏省电力公司\n所属系统:{{app_alias}}\n事件名称:{{events_name}}\n事件创建时间:{{date_time}}\n事件描述:{{events}}"
+      # sqlRecord: False
 

+ 14 - 3
handler/events.go

@@ -21,20 +21,31 @@ type EventHandler struct {
 	JR             *omodels.JudgeResult    `json:"judge_result"`
 	Emo            *omodels.Events
 	promMap        *sync.Map
+	SmsInfo        *SmsConfig
 	CheckStartTime time.Time        `json:"check_starttime"`
 	Errs           map[string]error `json:"_"`
 }
 
+type SmsConfig struct {
+	Appkey    string              //:= "95598109"
+	Appsecret string              //:= "VXX8H0MzT7"
+	Url       string              `json:"url"`
+	AppsGroup map[string][]string //{"app_1":["172723811341","18622423425"],"app_2":["18726361723","18976572653"]}
+	SmsTpl    string
+	SQLRecord bool
+}
+
 /*
 	row: 单个数值,出现不代表异常,需要与point比较,符合则立即写入异常事件,单不一定告警,需要结合频率、周期进行判断
 	--{取消该条检测} rows: list值,出现代表有异常,立即写入异常事件监控,但不一定告警,需要结合频率,周期进行判断
 */
 
-func InitEventHandler(pMap *sync.Map) *EventHandler {
+func InitEventHandler(pMap *sync.Map, smsInfo *SmsConfig) *EventHandler {
 	e := new(EventHandler)
 	e.RP = new(omodels.OtRulesPolicy)
 	e.AC = new(omodels.AlertCondition)
 	e.JR = new(omodels.JudgeResult)
+	e.SmsInfo = smsInfo
 	e.JR.AlertStatus = INACTIVE
 	//eg:@@UNSET_apdex::health:5m_{"condition":"<","point":0.7,"point_type":"float","tigger_hz":3,"interval":5}
 	e.Errs = make(map[string]error)
@@ -283,7 +294,7 @@ func (e *EventHandler) CreateEventRecord(chdb *gorm.DB) *EventHandler {
 				//业务状态码异常数(近30分钟), x分钟内发生x次
 				exceptionName = fmt.Sprintf("%s, %d分钟内发生: %v次",
 					e.RP.RuleName, e.AC.Interval, e.JR.CompareV)
-			} else if e.JR.AlertStatus == FIRING {
+			} else if e.JR.AlertStatus == FIRING || e.JR.AlertStatus == SENDSUCCESS {
 				exceptionName = fmt.Sprintf("%s, %d分钟内发生%s%v次, 当前值为: %v",
 					e.RP.RuleName, e.AC.Interval, e.AC.Condition, e.JR.CompareV, e.JR.CompareV)
 			}
@@ -327,7 +338,7 @@ func (e *EventHandler) CreateEventRecord(chdb *gorm.DB) *EventHandler {
 }
 
 func (e *EventHandler) CreateAlert(db *gorm.DB) *EventHandler {
-	if e.JR.AlertStatus == FIRING {
+	if e.JR.AlertStatus == FIRING || e.JR.AlertStatus == SENDSUCCESS {
 		//TODO:  发送至告警队列
 		fe := amodels.OtFireEvents{
 			AppId:         e.Emo.AppID,

+ 38 - 16
handler/handler.go

@@ -34,6 +34,14 @@ const (
 	INACTIVE = 1 << iota
 	PENDING
 	FIRING
+	SENDSUCCESS
+
+	DEFAULTSMSTPL = `
+【江苏省电力公司】
+所属系统:{{app_name}}
+事件名称:{{events_name}}
+事件创建时间:{{date_time}}
+事件描述:{{events}}`
 )
 
 // type EventsAction interface {
@@ -124,6 +132,34 @@ func GetPrometheusInstance() string {
 	return extConfig.ExtConfig.Prometheus.Address
 }
 
+func GetSmsConfigInstance() *SmsConfig {
+	sc := new(SmsConfig)
+	sc.Appkey = extConfig.ExtConfig.SmsConfig.Appkey
+	sc.Appsecret = extConfig.ExtConfig.SmsConfig.Appsecret
+	// sc.SmsTpl = extConfig.ExtConfig.SmsConfig.SmsTpl
+	// TODO: set default sms template,需要修改为外部模版
+	sc.SmsTpl = DEFAULTSMSTPL
+	sc.Url = extConfig.ExtConfig.SmsConfig.Url
+	// sc.SQLRecord = extConfig.ExtConfig.SmsConfig.SQLRecord
+	recordMap := make(map[string][]string)
+	parts := strings.Split(extConfig.ExtConfig.SmsConfig.AppsGroup, ";")
+	for _, part := range parts {
+		if part == "" {
+			continue
+		}
+		subParts := strings.SplitN(part, ":", 2)
+		if len(subParts) != 2 {
+			continue
+		}
+		id := subParts[0]
+		phoneNumbers := subParts[1]
+		recordMap[id] = strings.Split(phoneNumbers, ",")
+	}
+	sc.AppsGroup = recordMap
+	logger.Info("sms phone map: ", sc.AppsGroup)
+	return sc
+}
+
 func GetApps() ([]amodels.OtApps, error) {
 	appList := make([]amodels.OtApps, 0)
 	if err := GetDBInstance().Find(&appList).Error; err != nil {
@@ -242,7 +278,7 @@ func (a *AlertManager) PolicyConsumer() {
 func (a *AlertManager) consumerHandler(i interface{}) {
 	var err error
 	c := 0
-	eHandler := InitEventHandler(a.PromMap)
+	eHandler := InitEventHandler(a.PromMap, GetSmsConfigInstance())
 	rp, ok := i.(omodels.OtRulesPolicy)
 	if ok {
 		eHandler.RP = &rp
@@ -269,21 +305,6 @@ LOOP:
 		}
 		goto DONE
 	}
-	// if len(eHandler.
-	// 	JudgeRow().          //判断指标还是事件
-	// 	SetUID().            //
-	// 	PointCompare().      //判断指标与阈值比较
-	// 	JudgeTriggerHz().    //判断触发频率
-	// 	JudgeInterval(a.CH). //判断触发周期
-	// 	CreateEventRecord(a.CH). //记录事件
-	// 	CreateAlert(a.DB).       //判断本次检测是否发出告警
-	// 	Errs) > 0 {
-	// 	c += 1
-	// 	if c < a.MaxRetry {
-	// 		goto LOOP
-	// 	}
-	// 	goto DONE
-	// }
 	if len(eHandler.
 		JudgeRow().                //判断指标还是事件
 		SetUID().                  //
@@ -291,6 +312,7 @@ LOOP:
 		SetPromKV().               //暴露指标 TODO:
 		PointCompare().            //判断指标与阈值比较
 		JudgeInterval(a.CH).       //判断触发周期
+		SendMsg().                 //发送短信
 		CreateEventRecord(a.CH).   //记录事件
 		CreateAlert(a.DB).         //判断本次检测是否发出告警
 		Errs) > 0 {

+ 139 - 0
handler/sms.go

@@ -0,0 +1,139 @@
+package handler
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math/big"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/go-admin-team/go-admin-core/logger"
+)
+
+type SingleMsg struct {
+	ID         string `json:"id"`
+	MsgContent string `json:"msgcontent"`
+	MsgType    string `json:"msgtype"`
+	PhoneNum   string `json:"phonenum"`
+}
+
+type SendMsg struct {
+	SubmitTime string      `json:"submittime"`
+	MqReceipt  bool        `json:"mqreceipt"`
+	ExtCode    string      `json:"extcode"`
+	MsgList    []SingleMsg `json:"msgList"`
+}
+
+func sm3(data string) string {
+	// 这里应该实现SM3算法,由于Go标准库中没有直接支持,这里只是返回一个模拟的字符串
+	return "fake-sm3-hash"
+}
+
+func (e *EventHandler) SendMsg() *EventHandler {
+	logger.Debugf("In sms Send func, %s", e.RP.AppAlias)
+	//判断该应用是否添加了短信通知
+	_, ok := e.SmsInfo.AppsGroup[e.RP.AppAlias]
+	if !ok {
+		return e
+	}
+	logger.Debug("has key......", e.JR.AlertStatus)
+	//判断是否需要告警
+	if !(e.JR.AlertStatus == FIRING) {
+		return e
+	}
+
+	logger.Info("start to send sms...")
+
+	appkey := e.SmsInfo.Appkey
+	appsecret := e.SmsInfo.Appsecret
+
+	headers := make(map[string]string)
+	nonce, err := rand.Int(rand.Reader, big.NewInt(1<<62))
+	if err != nil {
+		e.Errs["rander error"] = err
+		return e
+	}
+	timestamp := time.Now().UnixNano() / 1e6
+
+	headers["x-system-key"] = appkey
+	headers["x-system-nonce"] = nonce.String()
+	headers["x-system-timestamp"] = fmt.Sprintf("%d", timestamp)
+	headers["x-system-token"] = sm3(appkey + fmt.Sprintf("%d", timestamp) + appsecret + nonce.String())
+
+	// set smsContent
+	// 所属系统:{{app_name}}
+	// 事件名称:{{events_name}}
+	// 事件创建时间:{{date_time}}
+	// 事件描述:{{events}}`
+	e.SmsInfo.SmsTpl = strings.Replace(e.SmsInfo.SmsTpl, "{{app_name}}", e.RP.AppName, -1)
+	e.SmsInfo.SmsTpl = strings.Replace(e.SmsInfo.SmsTpl, "{{events_name}}", e.RP.RuleName, -1)
+	e.SmsInfo.SmsTpl = strings.Replace(e.SmsInfo.SmsTpl, "{{date_time}}", e.RP.CreateTime.Format("2006-01-02 15:04:05"), -1)
+	e.SmsInfo.SmsTpl = strings.Replace(e.SmsInfo.SmsTpl, "{{events}}", fmt.Sprintf("%s, %d分钟内发生%s%v次, 当前值为: %v",
+		e.RP.RuleName, e.AC.Interval, e.AC.Condition, e.JR.CompareV, e.JR.CompareV), -1)
+	logger.Info("sms build: ", e.SmsInfo.SmsTpl)
+
+	MsgList := make([]SingleMsg, 0)
+	phoneList, ok := e.SmsInfo.AppsGroup[e.RP.AppAlias]
+	if ok {
+
+		for id, phone := range phoneList {
+			ssm := SingleMsg{
+				ID:         fmt.Sprintf("%d", id),
+				MsgContent: e.SmsInfo.SmsTpl,
+				MsgType:    "",
+				PhoneNum:   phone,
+			}
+			MsgList = append(MsgList, ssm)
+		}
+	}
+	sendMsg := SendMsg{
+		SubmitTime: time.Now().Format("2006-01-02 15:04:05"),
+		MqReceipt:  false,
+		ExtCode:    "",
+		MsgList:    MsgList,
+	}
+
+	jsonData, err := json.Marshal(sendMsg)
+	if err != nil {
+		e.Errs["sms json marshal error"] = err
+		return e
+	}
+
+	client := &http.Client{}
+	req, err := http.NewRequest("POST", e.SmsInfo.Url, strings.NewReader(string(jsonData)))
+	if err != nil {
+		e.Errs["sms post build error"] = err
+		return e
+	}
+
+	for key, value := range headers {
+		req.Header.Set(key, value)
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		e.Errs["sms post do error"] = err
+		return e
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		e.Errs["sms body read error"] = err
+		return e
+	}
+
+	logger.Infof("sms response body: %v", string(body))
+	e.JR.AlertStatus = SENDSUCCESS
+	return e
+}
+
+// smsTpl =`
+// 【江苏省电力公司】
+// 所属系统:{{app_alias}}
+// 事件名称:{{events_name}}
+// 事件创建时间:{{date_time}}
+// 事件描述:{{events}}`