作者:bridgewel
项目:bosu
func (s *Schedule) Action(user, message string, t ActionType, ak expr.AlertKey) error {
s.Lock("Action")
defer s.Unlock()
st := s.status[ak]
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
ack := func() {
delete(s.Notifications, ak)
st.NeedAck = false
}
isUnknown := st.AbnormalStatus() == StUnknown
isError := st.AbnormalStatus() == StError
timestamp := time.Now().UTC()
switch t {
case ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
}
ack()
case ActionClose:
if st.NeedAck {
ack()
}
if st.IsActive() && !isError {
return fmt.Errorf("cannot close active alert")
}
st.Open = false
last := st.Last()
if last.IncidentId != 0 {
s.incidentLock.Lock()
if incident, ok := s.Incidents[last.IncidentId]; ok {
incident.End = ×tamp
}
s.incidentLock.Unlock()
}
case ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
}
if st.NeedAck {
ack()
}
st.Open = false
st.Forgotten = true
delete(s.status, ak)
default:
return fmt.Errorf("unknown action type: %v", t)
}
st.Action(user, message, t, timestamp)
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
log.Println(err)
}
return nil
}
作者:snowsnai
项目:bosu
func NewStatus(ak expr.AlertKey) *State {
g := ak.Group()
return &State{
Alert: ak.Name(),
Tags: g.Tags(),
Group: g,
}
}
作者:snowsnai
项目:bosu
// RunHistory for a single alert key. Returns true if notifications were altered.
func (s *Schedule) runHistory(r *RunHistory, ak expr.AlertKey, event *Event, silenced map[expr.AlertKey]Silence) bool {
checkNotify := false
// get existing state object for alert key. add to schedule status if doesn't already exist
state := s.GetStatus(ak)
if state == nil {
state = NewStatus(ak)
s.SetStatus(ak, state)
}
defer s.SetStatus(ak, state)
// make sure we always touch the state.
state.Touched = r.Start
// set state.Result according to event result
if event.Error != nil {
state.Result = event.Error
} else if event.Crit != nil {
state.Result = event.Crit
} else if event.Warn != nil {
state.Result = event.Warn
}
// if event is unevaluated, we are done.
state.Unevaluated = event.Unevaluated
if event.Unevaluated {
return checkNotify
}
// assign incident id to new event if applicable
prev := state.Last()
event.Time = r.Start
if prev.IncidentId != 0 {
// If last event has incident id and is not closed, we continue it.
s.incidentLock.Lock()
if incident, ok := s.Incidents[prev.IncidentId]; ok && incident.End == nil {
event.IncidentId = prev.IncidentId
}
s.incidentLock.Unlock()
}
if event.IncidentId == 0 && event.Status != StNormal {
// Otherwise, create new incident on first non-normal event.
event.IncidentId = s.createIncident(ak, event.Time).Id
}
// add new event to state
last := state.AbnormalStatus()
state.Append(event)
a := s.Conf.Alerts[ak.Name()]
wasOpen := state.Open
// render templates and open alert key if abnormal
if event.Status > StNormal {
s.executeTemplates(state, event, a, r)
state.Open = true
if a.Log {
state.Open = false
}
}
// On state increase, clear old notifications and notify current.
// On state decrease, and if the old alert was already acknowledged, notify current.
// If the old alert was not acknowledged, do nothing.
// Do nothing if state did not change.
notify := func(ns *conf.Notifications) {
if a.Log {
lastLogTime := state.LastLogTime
now := time.Now()
if now.Before(lastLogTime.Add(a.MaxLogFrequency)) {
return
}
state.LastLogTime = now
}
nots := ns.Get(s.Conf, state.Group)
for _, n := range nots {
s.Notify(state, n)
checkNotify = true
}
}
notifyCurrent := func() {
// Auto close ignoreUnknowns.
if a.IgnoreUnknown && event.Status == StUnknown {
state.Open = false
state.Forgotten = true
state.NeedAck = false
state.Action("bosun", "Auto close because alert has ignoreUnknown.", ActionClose, event.Time)
slog.Infof("auto close %s because alert has ignoreUnknown", ak)
return
} else if silenced[ak].Forget && event.Status == StUnknown {
state.Open = false
state.Forgotten = true
state.NeedAck = false
state.Action("bosun", "Auto close because alert is silenced and marked auto forget.", ActionClose, event.Time)
slog.Infof("auto close %s because alert is silenced and marked auto forget", ak)
return
}
state.NeedAck = true
switch event.Status {
case StCritical, StUnknown:
notify(a.CritNotification)
case StWarning:
notify(a.WarnNotification)
}
}
clearOld := func() {
state.NeedAck = false
delete(s.Notifications, ak)
//.........这里部分代码省略.........