作者:noblehn
项目:bosu
func (s *Schedule) Action(user, message string, t models.ActionType, ak models.AlertKey) error {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
st, err := s.DataAccess.State().GetLatestIncident(ak)
if err != nil {
return err
}
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
isUnknown := st.LastAbnormalStatus == models.StUnknown
timestamp := utcNow()
switch t {
case models.ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
}
st.NeedAck = false
if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
return err
}
case models.ActionClose:
if st.IsActive() {
return fmt.Errorf("cannot close active alert")
}
fallthrough
case models.ActionForceClose:
st.Open = false
st.End = ×tamp
case models.ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
}
fallthrough
case models.ActionPurge:
return s.DataAccess.State().Forget(ak)
default:
return fmt.Errorf("unknown action type: %v", t)
}
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
st.Actions = append(st.Actions, models.Action{
Message: message,
Time: timestamp,
Type: t,
User: user,
})
_, err = s.DataAccess.State().UpdateIncidentState(st)
return err
}
作者:mathp
项目:bosu
func (s *Schedule) Unlock() {
holder := s.mutexHolder
start := s.mutexAquired
waitTime := s.mutexWaitTime
s.mutex.Unlock()
collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "wait"}, waitTime)
collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "hold"}, int64(time.Since(start)/time.Millisecond))
collect.Add("schedule.lock_count", opentsdb.TagSet{"caller": holder}, 1)
}
作者:nicolle
项目:bosu
func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (ak models.AlertKey, e error) {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
defer func() {
if e == nil {
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
slog.Errorln(err)
}
if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
e = err
}
}
}()
isUnknown := st.LastAbnormalStatus == models.StUnknown
timestamp := utcNow()
switch t {
case models.ActionAcknowledge:
if !st.NeedAck {
return "", fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return "", fmt.Errorf("cannot acknowledge closed alert")
}
st.NeedAck = false
case models.ActionClose:
if st.IsActive() {
return "", fmt.Errorf("cannot close active alert")
}
fallthrough
case models.ActionForceClose:
st.Open = false
st.End = ×tamp
case models.ActionForget:
if !isUnknown {
return "", fmt.Errorf("can only forget unknowns")
}
fallthrough
case models.ActionPurge:
return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
case models.ActionNote:
// pass
default:
return "", fmt.Errorf("unknown action type: %v", t)
}
st.Actions = append(st.Actions, models.Action{
Message: message,
Time: timestamp,
Type: t,
User: user,
})
_, err := s.DataAccess.State().UpdateIncidentState(st)
return st.AlertKey, err
}
作者:jm
项目:bosu
func (rp *relayProxy) ServeHTTP(responseWriter http.ResponseWriter, r *http.Request) {
clean := func(s string) string {
return opentsdb.MustReplace(s, "_")
}
reader := &passthru{ReadCloser: r.Body}
r.Body = reader
w := &relayWriter{ResponseWriter: responseWriter}
rp.ReverseProxy.ServeHTTP(w, r)
indexTSDB(r, reader.buf.Bytes())
tags := opentsdb.TagSet{"path": clean(r.URL.Path), "remote": clean(strings.Split(r.RemoteAddr, ":")[0])}
collect.Add("relay.bytes", tags, int64(reader.buf.Len()))
tags["status"] = strconv.Itoa(w.code)
collect.Add("relay.response", tags, 1)
}
作者:bridgewel
项目:bosu
func (s *Schedule) Action(user, message string, t ActionType, ak expr.AlertKey) error {
s.Lock("Action")
defer s.Unlock()
st := s.status[ak]
if st == nil {
return fmt.Errorf("no such alert key: %v", ak)
}
ack := func() {
delete(s.Notifications, ak)
st.NeedAck = false
}
isUnknown := st.AbnormalStatus() == StUnknown
isError := st.AbnormalStatus() == StError
timestamp := time.Now().UTC()
switch t {
case ActionAcknowledge:
if !st.NeedAck {
return fmt.Errorf("alert already acknowledged")
}
if !st.Open {
return fmt.Errorf("cannot acknowledge closed alert")
}
ack()
case ActionClose:
if st.NeedAck {
ack()
}
if st.IsActive() && !isError {
return fmt.Errorf("cannot close active alert")
}
st.Open = false
last := st.Last()
if last.IncidentId != 0 {
s.incidentLock.Lock()
if incident, ok := s.Incidents[last.IncidentId]; ok {
incident.End = ×tamp
}
s.incidentLock.Unlock()
}
case ActionForget:
if !isUnknown {
return fmt.Errorf("can only forget unknowns")
}
if st.NeedAck {
ack()
}
st.Open = false
st.Forgotten = true
delete(s.status, ak)
default:
return fmt.Errorf("unknown action type: %v", t)
}
st.Action(user, message, t, timestamp)
// Would like to also track the alert group, but I believe this is impossible because any character
// that could be used as a delimiter could also be a valid tag key or tag value character
if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
log.Println(err)
}
return nil
}
作者:uro
项目:bosu
func (s *Search) Index(mdp opentsdb.MultiDataPoint) {
for _, dp := range mdp {
s.Lock()
mmap := s.last[dp.Metric]
if mmap == nil {
mmap = make(map[string]*database.LastInfo)
s.last[dp.Metric] = mmap
}
p := mmap[dp.Tags.String()]
if p == nil {
p = &database.LastInfo{}
mmap[dp.Tags.String()] = p
}
if p.Timestamp < dp.Timestamp {
if fv, err := getFloat(dp.Value); err == nil {
p.DiffFromPrev = (fv - p.LastVal) / float64(dp.Timestamp-p.Timestamp)
p.LastVal = fv
} else {
slog.Error(err)
}
p.Timestamp = dp.Timestamp
}
s.Unlock()
select {
case s.indexQueue <- dp:
default:
collect.Add("search.dropped", opentsdb.TagSet{}, 1)
}
}
}
作者:evgeny-potapo
项目:bosu
func (s *Search) Index(mdp opentsdb.MultiDataPoint) {
for _, dp := range mdp {
s.Lock()
metric := dp.Metric
key := metric + dp.Tags.String()
p := s.Last[key]
if p == nil {
p = &lastInfo{}
s.Last[key] = p
}
if p.timestamp < dp.Timestamp {
if fv, err := getFloat(dp.Value); err == nil {
p.diffFromPrev = (fv - p.lastVal) / float64(dp.Timestamp-p.timestamp)
p.lastVal = fv
} else {
slog.Error(err)
}
p.timestamp = dp.Timestamp
}
s.Unlock()
select {
case s.indexQueue <- dp:
default:
collect.Add("search.dropped", opentsdb.TagSet{}, 1)
}
}
}
作者:noblehn
项目:bosu
func (rp *relayProxy) relayPut(responseWriter http.ResponseWriter, r *http.Request, parse bool) {
isRelayed := r.Header.Get(relayHeader) != ""
reader := &passthru{ReadCloser: r.Body}
r.Body = reader
w := &relayWriter{ResponseWriter: responseWriter}
rp.TSDBProxy.ServeHTTP(w, r)
if w.code/100 != 2 {
verbose("got status %d", w.code)
return
}
verbose("relayed to tsdb")
collect.Add("puts.relayed", opentsdb.TagSet{}, 1)
// Send to bosun in a separate go routine so we can end the source's request.
go func() {
body := bytes.NewBuffer(reader.buf.Bytes())
req, err := http.NewRequest(r.Method, bosunIndexURL, body)
if err != nil {
verbose("%v", err)
return
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
verbose("bosun relay error: %v", err)
return
}
resp.Body.Close()
verbose("bosun relay success")
}()
// Parse and denormalize datapoints
if !isRelayed && parse && denormalizationRules != nil {
go rp.denormalize(bytes.NewReader(reader.buf.Bytes()))
}
if !isRelayed && len(relayPutUrls) > 0 {
go func() {
for _, relayUrl := range relayPutUrls {
body := bytes.NewBuffer(reader.buf.Bytes())
req, err := http.NewRequest(r.Method, relayUrl, body)
if err != nil {
verbose("%v", err)
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Content-Encoding", "gzip")
req.Header.Add(relayHeader, myHost)
resp, err := http.DefaultClient.Do(req)
if err != nil {
verbose("secondary relay error: %v", err)
return
}
resp.Body.Close()
verbose("secondary relay success")
}
}()
}
}
作者:mathp
项目:bosu
func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*Attachment) {
e := email.NewEmail()
e.From = c.EmailFrom
for _, a := range n.Email {
e.To = append(e.To, a.Address)
}
e.Subject = string(subject)
e.HTML = body
for _, a := range attachments {
e.Attach(bytes.NewBuffer(a.Data), a.Filename, a.ContentType)
}
if err := Send(e, c.SMTPHost, c.SMTPUsername, c.SMTPPassword); err != nil {
collect.Add("email.sent_failed", nil, 1)
log.Printf("failed to send alert %v to %v %v\n", ak, e.To, err)
return
}
collect.Add("email.sent", nil, 1)
log.Printf("relayed alert %v to %v sucessfully\n", ak, e.To)
}
作者:BobbW
项目:bosu
func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*Attachment) {
e := email.NewEmail()
e.From = c.EmailFrom
for _, a := range n.Email {
e.To = append(e.To, a.Address)
}
e.Subject = string(subject)
e.HTML = body
for _, a := range attachments {
e.Attach(bytes.NewBuffer(a.Data), a.Filename, a.ContentType)
}
e.Headers.Add("X-Bosun-Server", util.Hostname)
if err := Send(e, c.SMTPHost, c.SMTPUsername, c.SMTPPassword); err != nil {
collect.Add("email.sent_failed", nil, 1)
slog.Errorf("failed to send alert %v to %v %v\n", ak, e.To, err)
return
}
collect.Add("email.sent", nil, 1)
slog.Infof("relayed alert %v to %v sucessfully. Subject: %d bytes. Body: %d bytes.", ak, e.To, len(subject), len(body))
}
作者:jm
项目:bosu
func indexTSDB(r *http.Request, body []byte) {
clean := func(s string) string {
return opentsdb.MustReplace(s, "_")
}
if r, err := gzip.NewReader(bytes.NewReader(body)); err == nil {
body, _ = ioutil.ReadAll(r)
r.Close()
}
var dp opentsdb.DataPoint
var mdp opentsdb.MultiDataPoint
if err := json.Unmarshal(body, &mdp); err == nil {
} else if err = json.Unmarshal(body, &dp); err == nil {
mdp = opentsdb.MultiDataPoint{&dp}
}
if len(mdp) > 0 {
ra := strings.Split(r.RemoteAddr, ":")[0]
tags := opentsdb.TagSet{"remote": clean(ra)}
collect.Add("search.puts_relayed", tags, 1)
collect.Add("search.datapoints_relayed", tags, int64(len(mdp)))
schedule.Search.Index(mdp)
}
}
作者:noblehn
项目:bosu
func (rp *relayProxy) relayMetadata(responseWriter http.ResponseWriter, r *http.Request) {
reader := &passthru{ReadCloser: r.Body}
r.Body = reader
w := &relayWriter{ResponseWriter: responseWriter}
rp.BosunProxy.ServeHTTP(w, r)
if w.code != 204 {
verbose("got status %d", w.code)
return
}
verbose("relayed metadata to bosun")
collect.Add("metadata.relayed", opentsdb.TagSet{}, 1)
if r.Header.Get(relayHeader) != "" {
return
}
if len(relayPutUrls) != 0 {
go func() {
for _, relayUrl := range relayPutUrls {
relayUrl = strings.Replace(relayUrl, "/put", "/metadata/put", 1)
body := bytes.NewBuffer(reader.buf.Bytes())
req, err := http.NewRequest(r.Method, relayUrl, body)
if err != nil {
verbose("%v", err)
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Add(relayHeader, myHost)
resp, err := http.DefaultClient.Do(req)
if err != nil {
verbose("secondary relay error: %v", err)
return
}
resp.Body.Close()
verbose("secondary relay success")
}
}()
}
}
作者:Skyscanne
项目:bosu
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error) {
if e == nil {
return
}
defer func() {
if err == nil {
return
}
collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
slog.Errorln(err)
}()
results, err := s.executeExpr(T, rh, a, e)
if err != nil {
return nil, err
}
Loop:
for _, r := range results.Results {
if s.Conf.Squelched(a, r.Group) {
continue
}
ak := models.NewAlertKey(a.Name, r.Group)
for _, v := range ignore {
if ak == v {
continue Loop
}
}
var n float64
n, err = valueToFloat(r.Value)
if err != nil {
return
}
event := rh.Events[ak]
if event == nil {
event = new(models.Event)
rh.Events[ak] = event
}
result := &models.Result{
Computations: r.Computations,
Value: models.Float(n),
Expr: e.String(),
}
switch checkStatus {
case models.StWarning:
event.Warn = result
case models.StCritical:
event.Crit = result
}
status := checkStatus
if math.IsNaN(n) {
status = checkStatus
} else if n == 0 {
status = models.StNormal
}
if status != models.StNormal {
alerts = append(alerts, ak)
}
if status > rh.Events[ak].Status {
event.Status = status
}
}
return
}
作者:snowsnai
项目:bosu
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus Status, ignore expr.AlertKeys) (alerts expr.AlertKeys, err error) {
if e == nil {
return
}
defer func() {
if err == nil {
return
}
collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
slog.Errorln(err)
}()
results, err := s.executeExpr(T, rh, a, e)
if err != nil {
return nil, err
}
Loop:
for _, r := range results.Results {
if s.Conf.Squelched(a, r.Group) {
continue
}
ak := expr.NewAlertKey(a.Name, r.Group)
for _, v := range ignore {
if ak == v {
continue Loop
}
}
var n float64
switch v := r.Value.(type) {
case expr.Number:
n = float64(v)
case expr.Scalar:
n = float64(v)
default:
err = fmt.Errorf("expected number or scalar")
return
}
event := rh.Events[ak]
if event == nil {
event = new(Event)
rh.Events[ak] = event
}
result := &Result{
Result: r,
Expr: e.String(),
}
switch checkStatus {
case StWarning:
event.Warn = result
case StCritical:
event.Crit = result
}
status := checkStatus
if math.IsNaN(n) {
status = StError
} else if n == 0 {
status = StNormal
}
if status != StNormal {
alerts = append(alerts, ak)
}
if status > rh.Events[ak].Status {
event.Status = status
}
}
return
}
作者:nicolle
项目:bosu
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error, cancelled bool) {
if e == nil {
return
}
defer func() {
if err == nil {
return
}
collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
slog.Errorln(err)
}()
type res struct {
results *expr.Results
error error
}
// See s.CheckAlert for an explanation of execution and cancellation with this channel
rc := make(chan res, 1)
var results *expr.Results
go func() {
results, err := s.executeExpr(T, rh, a, e)
rc <- res{results, err}
}()
select {
case res := <-rc:
results = res.results
err = res.error
case <-s.runnerContext.Done():
return nil, nil, true
}
if err != nil {
return
}
Loop:
for _, r := range results.Results {
if s.RuleConf.Squelched(a, r.Group) {
continue
}
ak := models.NewAlertKey(a.Name, r.Group)
for _, v := range ignore {
if ak == v {
continue Loop
}
}
var n float64
n, err = valueToFloat(r.Value)
if err != nil {
return
}
event := rh.Events[ak]
if event == nil {
event = new(models.Event)
rh.Events[ak] = event
}
result := &models.Result{
Computations: r.Computations,
Value: models.Float(n),
Expr: e.String(),
}
switch checkStatus {
case models.StWarning:
event.Warn = result
case models.StCritical:
event.Crit = result
}
status := checkStatus
if math.IsNaN(n) {
status = checkStatus
} else if n == 0 {
status = models.StNormal
}
if status != models.StNormal {
alerts = append(alerts, ak)
}
if status > rh.Events[ak].Status {
event.Status = status
}
}
return
}