作者:noblehn
项目:bosu
func (s *Schedule) Action(user, message string, t models.ActionType, ak models.AlertKey) error {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
st, err := s.DataAccess.State().GetLatestIncident(ak)
if err != nil {
return err
}
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
isUnknown := st.LastAbnormalStatus == models.StUnknown
timestamp := utcNow()
switch t {
case models.ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
}
st.NeedAck = false
if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
return err
}
case models.ActionClose:
if st.IsActive() {
return fmt.Errorf("cannot close active alert")
}
fallthrough
case models.ActionForceClose:
st.Open = false
st.End = ×tamp
case models.ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
}
fallthrough
case models.ActionPurge:
return s.DataAccess.State().Forget(ak)
default:
return fmt.Errorf("unknown action type: %v", t)
}
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
st.Actions = append(st.Actions, models.Action{
Message: message,
Time: timestamp,
Type: t,
User: user,
})
_, err = s.DataAccess.State().UpdateIncidentState(st)
return err
}
作者:nicolle
项目:bosu
func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (ak models.AlertKey, e error) {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
defer func() {
if e == nil {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
e = err
}
}
}()
isUnknown := st.LastAbnormalStatus == models.StUnknown
timestamp := utcNow()
switch t {
case models.ActionAcknowledge:
if !st.NeedAck {
return "", fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return "", fmt.Errorf("cannot acknowledge closed alert")
}
st.NeedAck = false
case models.ActionClose:
if st.IsActive() {
return "", fmt.Errorf("cannot close active alert")
}
fallthrough
case models.ActionForceClose:
st.Open = false
st.End = ×tamp
case models.ActionForget:
if !isUnknown {
return "", fmt.Errorf("can only forget unknowns")
}
fallthrough
case models.ActionPurge:
return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
case models.ActionNote:
// pass
default:
return "", fmt.Errorf("unknown action type: %v", t)
}
st.Actions = append(st.Actions, models.Action{
Message: message,
Time: timestamp,
Type: t,
User: user,
})
_, err := s.DataAccess.State().UpdateIncidentState(st)
return st.AlertKey, err
}
作者:noblehn
项目:bosu
func (p *sqlplusParser) ParseAndAdd(line string) error {
parsed, n := p.parsedQuery, len(sqlplusParsers)
// query result separator is blank line
if line == "" {
return nil
}
// handle feed, end of one query
if line == "no rows selected" || strings.HasSuffix(line, " rows selected.") ||
strings.HasSuffix(line, " row selected.") {
p.parsedQuery++
return nil
}
// finished all queries
if parsed == n {
return nil
}
// process actual queries
if err := sqlplusParsers[parsed].parse(line, p.md, p.prefix, p.common); err != nil {
slog.Errorln("oracle sqlplus parser error:", err)
}
return nil
}
作者:snowsnai
项目:bosu
func (s *Schedule) Action(user, message string, t ActionType, ak expr.AlertKey) error {
s.Lock("Action")
defer s.Unlock()
st := s.status[ak]
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
ack := func() {
delete(s.Notifications, ak)
st.NeedAck = false
}
isUnknown := st.AbnormalStatus() == StUnknown
isError := st.AbnormalStatus() == StError
timestamp := time.Now().UTC()
switch t {
case ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
}
ack()
case ActionClose:
if st.NeedAck {
ack()
}
if st.IsActive() && !isError {
return fmt.Errorf("cannot close active alert")
}
st.Open = false
last := st.Last()
if last.IncidentId != 0 {
s.incidentLock.Lock()
if incident, ok := s.Incidents[last.IncidentId]; ok {
incident.End = ×tamp
}
s.incidentLock.Unlock()
}
case ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
}
if st.NeedAck {
ack()
}
st.Open = false
st.Forgotten = true
delete(s.status, ak)
default:
return fmt.Errorf("unknown action type: %v", t)
}
st.Action(user, message, t, timestamp)
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
return nil
}
作者:mathp
项目:bosu
func sendBatch(batch []*opentsdb.DataPoint) {
if Print {
for _, d := range batch {
j, err := d.MarshalJSON()
if err != nil {
slog.Error(err)
}
slog.Info(string(j))
}
recordSent(len(batch))
return
}
now := time.Now()
resp, err := SendDataPoints(batch, tsdbURLs[currentTsdbURL])
if err == nil {
defer resp.Body.Close()
}
d := time.Since(now).Nanoseconds() / 1e6
Sample("collect.post.duration", Tags, float64(d))
Add("collect.post.total_duration", Tags, d)
Add("collect.post.count", Tags, 1)
// Some problem with connecting to the server; retry later.
if err != nil || (resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK) {
if err != nil {
Add("collect.post.error", Tags, 1)
slog.Error(err)
// Switch endpoint if possible
currentTsdbURL = (currentTsdbURL + 1) % len(tsdbURLs)
} else if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
Add("collect.post.bad_status", Tags, 1)
slog.Errorln(resp.Status)
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
slog.Error(err)
}
if len(body) > 0 {
slog.Error(string(body))
}
// Switch endpoint if possible
currentTsdbURL = (currentTsdbURL + 1) % len(tsdbURLs)
}
restored := 0
for _, msg := range batch {
restored++
tchan <- msg
}
d := time.Second * 5
Add("collect.post.restore", Tags, int64(restored))
slog.Infof("restored %d, sleeping %s", restored, d)
time.Sleep(d)
return
}
recordSent(len(batch))
}
作者:BobbW
项目:bosu
func (n *Notification) DoPost(subject []byte) {
if n.Body != nil {
buf := new(bytes.Buffer)
if err := n.Body.Execute(buf, string(subject)); err != nil {
slog.Errorln(err)
return
}
subject = buf.Bytes()
}
resp, err := http.Post(n.Post.String(), n.ContentType, bytes.NewBuffer(subject))
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
if err != nil {
slog.Error(err)
return
}
if resp.StatusCode >= 300 {
slog.Errorln("bad response on notification post:", resp.Status)
}
}
作者:eswd
项目:bosu
func c_snmp_ips(community, host string) (opentsdb.MultiDataPoint, error) {
ifIPAdEntAddrRaw, err := snmp_subtree(host, community, ifIPAdEntAddr)
if err != nil {
return nil, err
}
ipAdEnts := make(map[string]*ipAdEntAddr)
for id, value := range ifIPAdEntAddrRaw {
// Split entry type id from ip address
sp := strings.SplitN(id, ".", 2)
if len(sp) != 2 {
slog.Errorln("unexpected length of snmp resonse")
}
typeId := sp[0]
address := sp[1]
if _, ok := ipAdEnts[address]; !ok {
ipAdEnts[address] = &ipAdEntAddr{}
}
switch typeId {
case "1":
if v, ok := value.([]byte); ok {
ipAdEnts[address].IP = v
}
case "2":
if v, ok := value.(int64); ok {
ipAdEnts[address].InterfaceId = v
}
case "3":
if v, ok := value.([]byte); ok {
ipAdEnts[address].Mask = v
}
}
}
ipsByInt := make(map[int64][]net.IPNet)
for _, ipNet := range ipAdEnts {
ipsByInt[ipNet.InterfaceId] = append(ipsByInt[ipNet.InterfaceId], ipNet.IPNet)
}
for intId, ipNets := range ipsByInt {
var ips []string
for _, ipNet := range ipNets {
ips = append(ips, ipNet.String())
}
sort.Strings(ips)
j, err := json.Marshal(ips)
if err != nil {
slog.Errorf("error marshaling ips for host %v: %v", host, err)
}
metadata.AddMeta("", opentsdb.TagSet{"host": host, "iface": fmt.Sprintf("%v", intId)}, "addresses", string(j), false)
}
return nil, nil
}
作者:noblehn
项目:bosu
func (n *Notification) DoPost(payload []byte, ak string) {
if n.Body != nil {
buf := new(bytes.Buffer)
if err := n.Body.Execute(buf, string(payload)); err != nil {
slog.Errorln(err)
return
}
payload = buf.Bytes()
}
resp, err := http.Post(n.Post.String(), n.ContentType, bytes.NewBuffer(payload))
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
if err != nil {
slog.Error(err)
return
}
if resp.StatusCode >= 300 {
slog.Errorln("bad response on notification post:", resp.Status)
} else {
slog.Infof("post notification successful for alert %s. Response code %d.", ak, resp.StatusCode)
}
}
作者:rprabha
项目:bosu
func sendMetadata(ms []Metasend) {
b, err := json.Marshal(&ms)
if err != nil {
slog.Error(err)
return
}
resp, err := http.Post(metahost, "application/json", bytes.NewBuffer(b))
if err != nil {
slog.Error(err)
return
}
if resp.StatusCode != 204 {
slog.Errorln("bad metadata return:", resp.Status)
return
}
}
作者:rajde
项目:bosu
func (s *Schedule) ExecuteBody(rh *RunHistory, a *conf.Alert, st *State, isEmail bool) ([]byte, []*conf.Attachment, error) {
t := a.Template
if t == nil || t.Body == nil {
return nil, nil, nil
}
c := s.Data(rh, st, a, isEmail)
buf := new(bytes.Buffer)
if err := t.Body.Execute(buf, c); err != nil {
return nil, nil, err
}
if inline, err := inliner.Inline(buf.String()); err == nil {
buf = bytes.NewBufferString(inline)
} else {
slog.Errorln(err)
}
return buf.Bytes(), c.Attachments, nil
}
作者:mathp
项目:bosu
func sendMetadata(ms []Metasend) {
b, err := json.Marshal(&ms)
if err != nil {
slog.Error(err)
return
}
resp, err := http.Post(metahosts[currentmetahost], "application/json", bytes.NewBuffer(b))
if err != nil {
slog.Error(err)
currentmetahost = (currentmetahost + 1) % len(metahosts)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
slog.Errorln("bad metadata return:", resp.Status)
return
}
}
作者:nicolle
项目:bosu
// utnotify is single notification for N unknown groups into a single notification
func (s *Schedule) utnotify(groups map[string]models.AlertKeys, n *conf.Notification) {
var total int
now := utcNow()
for _, group := range groups {
// Don't know what the following line does, just copied from unotify
s.Group[now] = group
total += len(group)
}
subject := fmt.Sprintf("%v unknown alert instances suppressed", total)
body := new(bytes.Buffer)
if err := unknownMultiGroup.Execute(body, struct {
Groups map[string]models.AlertKeys
Threshold int
}{
groups,
s.SystemConf.GetUnknownThreshold(),
}); err != nil {
slog.Errorln(err)
}
n.Notify(subject, body.String(), []byte(subject), body.Bytes(), s.SystemConf, "unknown_treshold")
}
作者:snowsnai
项目:bosu
func watch(root, pattern string, f func()) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
slog.Fatal(err)
}
filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if matched, err := filepath.Match(pattern, info.Name()); err != nil {
slog.Fatal(err)
} else if !matched {
return nil
}
err = watcher.Add(path)
if err != nil {
slog.Fatal(err)
}
return nil
})
slog.Infoln("watching", pattern, "in", root)
wait := time.Now()
go func() {
for {
select {
case event := <-watcher.Events:
if wait.After(time.Now()) {
continue
}
if event.Op&fsnotify.Write == fsnotify.Write {
f()
wait = time.Now().Add(time.Second * 2)
}
case err := <-watcher.Errors:
slog.Errorln("error:", err)
}
}
}()
}
作者:kroni
项目:bosu
func pingHost(host string) {
p := fastping.NewPinger()
tags := opentsdb.TagSet{"dst_host": host}
resolved := 0
defer func() {
collect.Put("ping.resolved", tags, resolved)
}()
ra, err := net.ResolveIPAddr("ip4:icmp", host)
if err != nil {
return
}
resolved = 1
p.AddIPAddr(ra)
p.MaxRTT = time.Second * 5
timeout := 1
p.OnRecv = func(addr *net.IPAddr, t time.Duration) {
collect.Put("ping.rtt", tags, float64(t)/float64(time.Millisecond))
timeout = 0
}
if err := p.Run(); err != nil {
slog.Errorln(err)
}
collect.Put("ping.timeout", tags, timeout)
}
作者:eswd
项目:bosu
//.........这里部分代码省略.........
// assign incident id to new event if applicable
prev := state.Last()
worst := StNormal
event.Time = r.Start
if prev.IncidentId != 0 {
// If last event has incident id and is not closed, we continue it.
incident, err := s.DataAccess.Incidents().GetIncident(prev.IncidentId)
if err != nil {
slog.Error(err)
} else if incident.End == nil {
event.IncidentId = prev.IncidentId
worst = state.WorstThisIncident()
}
}
if event.IncidentId == 0 && event.Status != StNormal {
incident, err := s.createIncident(ak, event.Time)
if err != nil {
slog.Error("Error creating incident", err)
} else {
event.IncidentId = incident.Id
}
}
state.Append(event)
a := s.Conf.Alerts[ak.Name()]
// render templates and open alert key if abnormal
if event.Status > StNormal {
s.executeTemplates(state, event, a, r)
state.Open = true
if a.Log {
worst = StNormal
state.Open = false
}
}
// On state increase, clear old notifications and notify current.
// If the old alert was not acknowledged, do nothing.
// Do nothing if state did not change.
notify := func(ns *conf.Notifications) {
if a.Log {
lastLogTime := state.LastLogTime
now := time.Now()
if now.Before(lastLogTime.Add(a.MaxLogFrequency)) {
return
}
state.LastLogTime = now
}
nots := ns.Get(s.Conf, state.Group)
for _, n := range nots {
s.Notify(state, n)
checkNotify = true
}
}
notifyCurrent := func() {
// Auto close ignoreUnknowns.
if a.IgnoreUnknown && event.Status == StUnknown {
state.Open = false
state.Forgotten = true
state.NeedAck = false
state.Action("bosun", "Auto close because alert has ignoreUnknown.", ActionClose, event.Time)
slog.Infof("auto close %s because alert has ignoreUnknown", ak)
return
} else if silenced[ak].Forget && event.Status == StUnknown {
state.Open = false
state.Forgotten = true
state.NeedAck = false
state.Action("bosun", "Auto close because alert is silenced and marked auto forget.", ActionClose, event.Time)
slog.Infof("auto close %s because alert is silenced and marked auto forget", ak)
return
}
state.NeedAck = true
switch event.Status {
case StCritical, StUnknown:
notify(a.CritNotification)
case StWarning:
notify(a.WarnNotification)
}
}
clearOld := func() {
state.NeedAck = false
delete(s.Notifications, ak)
}
// lock while we change notifications.
s.Lock("RunHistory")
if event.Status > worst {
clearOld()
notifyCurrent()
} else if _, ok := silenced[ak]; ok && event.Status == StNormal {
go func(ak models.AlertKey) {
slog.Infof("auto close %s because was silenced", ak)
err := s.Action("bosun", "Auto close because was silenced.", ActionClose, ak)
if err != nil {
slog.Errorln(err)
}
}(ak)
}
s.Unlock()
return checkNotify
}
作者:trigrass
项目:bosu
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
defer func() {
bosunStartupTime = time.Now()
}()
slog.Infoln("RestoreState")
start := time.Now()
s.Lock("RestoreState")
defer s.Unlock()
s.Search.Lock()
defer s.Search.Unlock()
s.Notifications = nil
decode := func(name string, dst interface{}) error {
var data []byte
err := s.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(dbBucket))
if b == nil {
return fmt.Errorf("unknown bucket: %v", dbBucket)
}
data = b.Get([]byte(name))
return nil
})
if err != nil {
return err
}
gr, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return err
}
defer gr.Close()
return gob.NewDecoder(gr).Decode(dst)
}
if err := decode(dbMetadata, &s.Metadata); err != nil {
slog.Errorln(dbMetadata, err)
}
if err := decode(dbMetricMetadata, &s.metricMetadata); err != nil {
slog.Errorln(dbMetricMetadata, err)
}
for k, v := range s.Metadata {
if k.Name == "desc" || k.Name == "rate" || k.Name == "unit" {
s.PutMetadata(k, v.Value)
delete(s.Metadata, k)
}
}
if err := decode(dbMetric, &s.Search.Metric); err != nil {
slog.Errorln(dbMetric, err)
}
if err := decode(dbTagk, &s.Search.Tagk); err != nil {
slog.Errorln(dbTagk, err)
}
if err := decode(dbTagv, &s.Search.Tagv); err != nil {
slog.Errorln(dbTagv, err)
}
if err := decode(dbMetricTags, &s.Search.MetricTags); err != nil {
slog.Errorln(dbMetricTags, err)
}
notifications := make(map[expr.AlertKey]map[string]time.Time)
if err := decode(dbNotifications, ¬ifications); err != nil {
slog.Errorln(dbNotifications, err)
}
if err := decode(dbSilence, &s.Silence); err != nil {
slog.Errorln(dbSilence, err)
}
if err := decode(dbIncidents, &s.Incidents); err != nil {
slog.Errorln(dbIncidents, err)
}
if err := decode(dbErrors, &s.AlertStatuses); err != nil {
slog.Errorln(dbErrors, err)
}
// Calculate next incident id.
for _, i := range s.Incidents {
if i.Id > s.maxIncidentId {
s.maxIncidentId = i.Id
}
}
status := make(States)
if err := decode(dbStatus, &status); err != nil {
slog.Errorln(dbStatus, err)
}
clear := func(r *Result) {
if r == nil {
return
}
r.Computations = nil
}
for ak, st := range status {
a, present := s.Conf.Alerts[ak.Name()]
if !present {
slog.Errorln("sched: alert no longer present, ignoring:", ak)
continue
} else if s.Conf.Squelched(a, st.Group) {
slog.Infoln("sched: alert now squelched:", ak)
continue
} else {
t := a.Unknown
if t == 0 {
t = s.Conf.CheckFrequency
}
if t == 0 && st.Last().Status == StUnknown {
//.........这里部分代码省略.........
作者:rajde
项目:bosu
func (c *Conf) loadNotification(s *parse.SectionNode) {
name := s.Name.Text
if _, ok := c.Notifications[name]; ok {
c.errorf("duplicate notification name: %s", name)
}
n := Notification{
Vars: make(map[string]string),
ContentType: "application/x-www-form-urlencoded",
Name: name,
RunOnActions: true,
}
n.Text = s.RawText
funcs := ttemplate.FuncMap{
"V": func(v string) string {
return c.Expand(v, n.Vars, false)
},
"json": func(v interface{}) string {
b, err := json.Marshal(v)
if err != nil {
slog.Errorln(err)
}
return string(b)
},
}
c.Notifications[name] = &n
pairs := c.getPairs(s, n.Vars, sNormal)
for _, p := range pairs {
c.at(p.node)
v := p.val
switch k := p.key; k {
case "email":
if c.SMTPHost == "" || c.EmailFrom == "" {
c.errorf("email notifications require both smtpHost and emailFrom to be set")
}
n.email = v
email, err := mail.ParseAddressList(n.email)
if err != nil {
c.error(err)
}
n.Email = email
case "post":
n.post = v
post, err := url.Parse(n.post)
if err != nil {
c.error(err)
}
n.Post = post
case "get":
n.get = v
get, err := url.Parse(n.get)
if err != nil {
c.error(err)
}
n.Get = get
case "print":
n.Print = true
case "contentType":
n.ContentType = v
case "next":
n.next = v
next, ok := c.Notifications[n.next]
if !ok {
c.errorf("unknown notification %s", n.next)
}
n.Next = next
case "timeout":
d, err := opentsdb.ParseDuration(v)
if err != nil {
c.error(err)
}
n.Timeout = time.Duration(d)
case "body":
n.body = v
tmpl := ttemplate.New(name).Funcs(funcs)
_, err := tmpl.Parse(n.body)
if err != nil {
c.error(err)
}
n.Body = tmpl
case "runOnActions":
n.RunOnActions = v == "true"
default:
c.errorf("unknown key %s", k)
}
}
c.at(s)
if n.Timeout > 0 && n.Next == nil {
c.errorf("timeout specified without next")
}
}
作者:snowsnai
项目:bosu
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus Status, ignore expr.AlertKeys) (alerts expr.AlertKeys, err error) {
if e == nil {
return
}
defer func() {
if err == nil {
return
}
collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
slog.Errorln(err)
}()
results, err := s.executeExpr(T, rh, a, e)
if err != nil {
return nil, err
}
Loop:
for _, r := range results.Results {
if s.Conf.Squelched(a, r.Group) {
continue
}
ak := expr.NewAlertKey(a.Name, r.Group)
for _, v := range ignore {
if ak == v {
continue Loop
}
}
var n float64
switch v := r.Value.(type) {
case expr.Number:
n = float64(v)
case expr.Scalar:
n = float64(v)
default:
err = fmt.Errorf("expected number or scalar")
return
}
event := rh.Events[ak]
if event == nil {
event = new(Event)
rh.Events[ak] = event
}
result := &Result{
Result: r,
Expr: e.String(),
}
switch checkStatus {
case StWarning:
event.Warn = result
case StCritical:
event.Crit = result
}
status := checkStatus
if math.IsNaN(n) {
status = StError
} else if n == 0 {
status = StNormal
}
if status != StNormal {
alerts = append(alerts, ak)
}
if status > rh.Events[ak].Status {
event.Status = status
}
}
return
}
作者:snowsnai
项目:bosu
// CollectStates sends various state information to bosun with collect.
func (s *Schedule) CollectStates() {
// [AlertName][Severity]Count
severityCounts := make(map[string]map[string]int64)
abnormalCounts := make(map[string]map[string]int64)
ackStatusCounts := make(map[string]map[bool]int64)
activeStatusCounts := make(map[string]map[bool]int64)
// Initalize the Counts
for _, alert := range s.Conf.Alerts {
severityCounts[alert.Name] = make(map[string]int64)
abnormalCounts[alert.Name] = make(map[string]int64)
var i Status
for i = 1; i.String() != "none"; i++ {
severityCounts[alert.Name][i.String()] = 0
abnormalCounts[alert.Name][i.String()] = 0
}
ackStatusCounts[alert.Name] = make(map[bool]int64)
activeStatusCounts[alert.Name] = make(map[bool]int64)
ackStatusCounts[alert.Name][false] = 0
activeStatusCounts[alert.Name][false] = 0
ackStatusCounts[alert.Name][true] = 0
activeStatusCounts[alert.Name][true] = 0
}
for _, state := range s.status {
if !state.Open {
continue
}
severity := state.Status().String()
lastAbnormal := state.AbnormalStatus().String()
severityCounts[state.Alert][severity]++
abnormalCounts[state.Alert][lastAbnormal]++
ackStatusCounts[state.Alert][state.NeedAck]++
activeStatusCounts[state.Alert][state.IsActive()]++
}
for alertName := range severityCounts {
ts := opentsdb.TagSet{"alert": alertName}
// The tagset of the alert is not included because there is no way to
// store the string of a group in OpenTSBD in a parsable way. This is
// because any delimiter we chose could also be part of a tag key or tag
// value.
for severity := range severityCounts[alertName] {
err := collect.Put("alerts.current_severity",
ts.Copy().Merge(opentsdb.TagSet{"severity": severity}),
severityCounts[alertName][severity])
if err != nil {
slog.Errorln(err)
}
err = collect.Put("alerts.last_abnormal_severity",
ts.Copy().Merge(opentsdb.TagSet{"severity": severity}),
abnormalCounts[alertName][severity])
if err != nil {
slog.Errorln(err)
}
}
err := collect.Put("alerts.acknowledgement_status",
ts.Copy().Merge(opentsdb.TagSet{"status": "unacknowledged"}),
ackStatusCounts[alertName][true])
err = collect.Put("alerts.acknowledgement_status",
ts.Copy().Merge(opentsdb.TagSet{"status": "acknowledged"}),
ackStatusCounts[alertName][false])
if err != nil {
slog.Errorln(err)
}
err = collect.Put("alerts.active_status",
ts.Copy().Merge(opentsdb.TagSet{"status": "active"}),
activeStatusCounts[alertName][true])
if err != nil {
slog.Errorln(err)
}
err = collect.Put("alerts.active_status",
ts.Copy().Merge(opentsdb.TagSet{"status": "inactive"}),
activeStatusCounts[alertName][false])
if err != nil {
slog.Errorln(err)
}
}
}
作者:nicolle
项目:bosu
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error, cancelled bool) {
if e == nil {
return
}
defer func() {
if err == nil {
return
}
collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
slog.Errorln(err)
}()
type res struct {
results *expr.Results
error error
}
// See s.CheckAlert for an explanation of execution and cancellation with this channel
rc := make(chan res, 1)
var results *expr.Results
go func() {
results, err := s.executeExpr(T, rh, a, e)
rc <- res{results, err}
}()
select {
case res := <-rc:
results = res.results
err = res.error
case <-s.runnerContext.Done():
return nil, nil, true
}
if err != nil {
return
}
Loop:
for _, r := range results.Results {
if s.RuleConf.Squelched(a, r.Group) {
continue
}
ak := models.NewAlertKey(a.Name, r.Group)
for _, v := range ignore {
if ak == v {
continue Loop
}
}
var n float64
n, err = valueToFloat(r.Value)
if err != nil {
return
}
event := rh.Events[ak]
if event == nil {
event = new(models.Event)
rh.Events[ak] = event
}
result := &models.Result{
Computations: r.Computations,
Value: models.Float(n),
Expr: e.String(),
}
switch checkStatus {
case models.StWarning:
event.Warn = result
case models.StCritical:
event.Crit = result
}
status := checkStatus
if math.IsNaN(n) {
status = checkStatus
} else if n == 0 {
status = models.StNormal
}
if status != models.StNormal {
alerts = append(alerts, ak)
}
if status > rh.Events[ak].Status {
event.Status = status
}
}
return
}