作者:eswd
项目:bosu
func (s *Schedule) sendNotifications(silenced map[models.AlertKey]models.Silence) {
if s.Conf.Quiet {
slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
return
}
for n, states := range s.pendingNotifications {
for _, st := range states {
ak := st.AlertKey()
_, silenced := silenced[ak]
if st.Last().Status == StUnknown {
if silenced {
slog.Infoln("silencing unknown", ak)
continue
}
s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
} else if silenced {
slog.Infoln("silencing", ak)
} else {
s.notify(st, n)
}
if n.Next != nil {
s.AddNotification(ak, n.Next, time.Now().UTC())
}
}
}
}
作者:noblehn
项目:bosu
func (s *Schedule) sendNotifications(silenced SilenceTester) {
if s.Conf.Quiet {
slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
return
}
for n, states := range s.pendingNotifications {
for _, st := range states {
ak := st.AlertKey
silenced := silenced(ak) != nil
if st.CurrentStatus == models.StUnknown {
if silenced {
slog.Infoln("silencing unknown", ak)
continue
}
s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
} else if silenced {
slog.Infoln("silencing", ak)
} else {
s.notify(st, n)
}
if n.Next != nil {
s.QueueNotification(ak, n.Next, utcNow())
}
}
}
}
作者:youngl9
项目:bosu
func (s *Schedule) save() {
if s.db == nil {
return
}
s.Lock("Save")
store := map[string]interface{}{
dbMetric: s.Search.Read.Metric,
dbTagk: s.Search.Read.Tagk,
dbTagv: s.Search.Read.Tagv,
dbMetricTags: s.Search.Read.MetricTags,
dbNotifications: s.Notifications,
dbSilence: s.Silence,
dbStatus: s.status,
dbMetadata: s.Metadata,
dbIncidents: s.Incidents,
}
tostore := make(map[string][]byte)
for name, data := range store {
f := new(bytes.Buffer)
gz := gzip.NewWriter(f)
cw := &counterWriter{w: gz}
enc := gob.NewEncoder(cw)
if err := enc.Encode(data); err != nil {
slog.Errorf("error saving %s: %v", name, err)
s.Unlock()
return
}
if err := gz.Flush(); err != nil {
slog.Errorf("gzip flush error saving %s: %v", name, err)
}
if err := gz.Close(); err != nil {
slog.Errorf("gzip close error saving %s: %v", name, err)
}
tostore[name] = f.Bytes()
slog.Infof("wrote %s: %v", name, conf.ByteSize(cw.written))
collect.Put("statefile.size", opentsdb.TagSet{"object": name}, cw.written)
}
s.Unlock()
err := s.db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists([]byte(dbBucket))
if err != nil {
return err
}
for name, data := range tostore {
if err := b.Put([]byte(name), data); err != nil {
return err
}
}
return nil
})
if err != nil {
slog.Errorf("save db update error: %v", err)
return
}
fi, err := os.Stat(s.Conf.StateFile)
if err == nil {
collect.Put("statefile.size", opentsdb.TagSet{"object": "total"}, fi.Size())
}
slog.Infoln("save to db complete")
}
作者:nicolle
项目:bosu
// CheckNotifications processes past notification events. It returns the next time a notification is needed.
func (s *Schedule) CheckNotifications() time.Time {
silenced := s.Silenced()
s.Lock("CheckNotifications")
defer s.Unlock()
latestTime := utcNow()
notifications, err := s.DataAccess.Notifications().GetDueNotifications()
if err != nil {
slog.Error("Error getting notifications", err)
return utcNow().Add(time.Minute)
}
for ak, ns := range notifications {
if si := silenced(ak); si != nil {
slog.Infoln("silencing", ak)
continue
}
for name, t := range ns {
n := s.RuleConf.GetNotification(name)
if n == nil {
continue
}
//If alert is currently unevaluated because of a dependency,
//simply requeue it until the dependency resolves itself.
_, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name())
unevaluated := false
for _, un := range uneval {
if un == ak {
unevaluated = true
break
}
}
if unevaluated {
s.QueueNotification(ak, n, t.Add(time.Minute))
continue
}
st, err := s.DataAccess.State().GetLatestIncident(ak)
if err != nil {
slog.Error(err)
continue
}
if st == nil {
continue
}
s.Notify(st, n)
}
}
s.sendNotifications(silenced)
s.pendingNotifications = nil
err = s.DataAccess.Notifications().ClearNotificationsBefore(latestTime)
if err != nil {
slog.Error("Error clearing notifications", err)
return utcNow().Add(time.Minute)
}
timeout, err := s.DataAccess.Notifications().GetNextNotificationTime()
if err != nil {
slog.Error("Error getting next notification time", err)
return utcNow().Add(time.Minute)
}
return timeout
}
作者:pd
项目:bosu
func recordSent(num int) {
if Debug {
slog.Infoln("sent", num)
}
slock.Lock()
sent += int64(num)
slock.Unlock()
}
作者:mathp
项目:bosu
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint) {
if c.Interval == 0 {
for {
next := time.After(DefaultFreq)
if err := c.runProgram(dpchan); err != nil {
slog.Infoln(err)
}
<-next
slog.Infoln("restarting", c.Path)
}
} else {
for {
next := time.After(c.Interval)
c.runProgram(dpchan)
<-next
}
}
}
作者:jareks
项目:bosu
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
defer func() {
bosunStartupTime = utcNow()
}()
slog.Infoln("RestoreState")
start := utcNow()
s.Lock("RestoreState")
defer s.Unlock()
s.Search.Lock()
defer s.Search.Unlock()
if err := migrateOldDataToRedis(s.db, s.DataAccess, s); err != nil {
return err
}
// delete metrictags if they exist.
deleteKey(s.db, "metrictags")
slog.Infoln("RestoreState done in", time.Since(start))
return nil
}
作者:noblehn
项目:bosu
func InitPrograms(cpath string) {
cdir, err := os.Open(cpath)
if err != nil {
slog.Infoln(err)
return
}
idirs, err := cdir.Readdir(0)
if err != nil {
slog.Infoln(err)
return
}
for _, idir := range idirs {
idirname := idir.Name()
i, err := strconv.Atoi(idirname)
if err != nil || i < 0 {
if idirname != "etc" && idirname != "lib" {
slog.Infoln("invalid collector folder name:", idirname)
}
continue
}
interval := time.Second * time.Duration(i)
dir, err := os.Open(filepath.Join(cdir.Name(), idirname))
if err != nil {
slog.Infoln(err)
continue
}
files, err := dir.Readdir(0)
if err != nil {
slog.Infoln(err)
continue
}
for _, file := range files {
if !isExecutable(file) {
continue
}
collectors = append(collectors, &ProgramCollector{
Path: filepath.Join(dir.Name(), file.Name()),
Interval: interval,
})
}
}
}
作者:eswd
项目:bosu
// CheckNotifications processes past notification events. It returns the
// duration until the soonest notification triggers.
func (s *Schedule) CheckNotifications() time.Duration {
silenced := s.Silenced()
s.Lock("CheckNotifications")
defer s.Unlock()
notifications := s.Notifications
s.Notifications = nil
for ak, ns := range notifications {
if _, present := silenced[ak]; present {
slog.Infoln("silencing", ak)
continue
}
for name, t := range ns {
n, present := s.Conf.Notifications[name]
if !present {
continue
}
remaining := t.Add(n.Timeout).Sub(time.Now())
if remaining > 0 {
s.AddNotification(ak, n, t)
continue
}
st := s.status[ak]
if st == nil {
continue
}
// If alert is currently unevaluated because of a dependency,
// simply requeue it until the dependency resolves itself.
if st.Unevaluated {
s.AddNotification(ak, n, t)
continue
}
s.Notify(st, n)
}
}
s.sendNotifications(silenced)
s.pendingNotifications = nil
timeout := time.Hour
now := time.Now()
for _, ns := range s.Notifications {
for name, t := range ns {
n, present := s.Conf.Notifications[name]
if !present {
continue
}
remaining := t.Add(n.Timeout).Sub(now)
if remaining < timeout {
timeout = remaining
}
}
}
return timeout
}
作者:nicolle
项目:bosu
func (s *Schedule) unotify(name string, group models.AlertKeys, n *conf.Notification) {
subject := new(bytes.Buffer)
body := new(bytes.Buffer)
now := utcNow()
s.Group[now] = group
t := s.RuleConf.GetUnknownTemplate()
if t == nil {
t = defaultUnknownTemplate
}
data := s.unknownData(now, name, group)
if t.Body != nil {
if err := t.Body.Execute(body, &data); err != nil {
slog.Infoln("unknown template error:", err)
}
}
if t.Subject != nil {
if err := t.Subject.Execute(subject, &data); err != nil {
slog.Infoln("unknown template error:", err)
}
}
n.Notify(subject.String(), body.String(), subject.Bytes(), body.Bytes(), s.SystemConf, name)
}
作者:noblehn
项目:bosu
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint, quit <-chan struct{}) {
if c.Interval == 0 {
for {
next := time.After(DefaultFreq)
if err := c.runProgram(dpchan); err != nil {
slog.Infoln(err)
}
<-next
slog.Infoln("restarting", c.Path)
}
} else {
for {
next := time.After(c.Interval)
c.runProgram(dpchan)
select {
case <-next:
case <-quit:
return
}
}
}
}
作者:nicolle
项目:bosu
func (s *Schedule) sendNotifications(silenced SilenceTester) {
if s.quiet {
slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
return
}
for n, states := range s.pendingNotifications {
for _, st := range states {
ak := st.AlertKey
alert := s.RuleConf.GetAlert(ak.Name())
if alert == nil {
continue
}
silenced := silenced(ak) != nil
if st.CurrentStatus == models.StUnknown {
if silenced {
slog.Infoln("silencing unknown", ak)
continue
}
s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
} else if silenced {
slog.Infof("silencing %s", ak)
continue
} else if !alert.Log && (!st.Open || !st.NeedAck) {
slog.Errorf("Cannot notify acked or closed alert %s. Clearing.", ak)
if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
slog.Error(err)
}
continue
} else {
s.notify(st, n)
}
if n.Next != nil {
s.QueueNotification(ak, n.Next, utcNow())
}
}
}
}
作者:nicolle
项目:bosu
//If there is Route53 data for this row, then populate the Route 53 item
func (b *billLineItem) fetchR53(awsBilling *awsBillingConfig) {
if b.ProductCode == "AmazonRoute53" { //Don't do anything if we don't have any R53 info to get
zoneID := strings.Split(b.ResourceID, "/")[1] //The billing ID has a huge resource ID, we only need the last part of it
cachedR53Zone, ok := awsBillingR53zones[zoneID] //Check if we have a copy of this zone in our local cache
if ok { //If we have a copy of the zone, then use that
b.Route53Zone = cachedR53Zone.HostedZone
return
}
//Otherwise we need to fetch it from Route 53
thisR53, fetchErr := awsBilling.r53svc.GetHostedZone(&route53.GetHostedZoneInput{
Id: aws.String(zoneID),
})
if fetchErr != nil {
slog.Infoln("Cannot fetch Route53 hosted zone", b.ResourceID, fetchErr)
}
awsBillingR53zones[zoneID] = *thisR53 //Store the fetched zone in the cache
b.Route53Zone = thisR53.HostedZone //And assign
}
}
作者:jm
项目:bosu
func c_bacula_status(user, pass, dbase string) (opentsdb.MultiDataPoint, error) {
dsn := fmt.Sprintf("%s:%[email protected]/%s", user, pass, dbase)
db, err := sql.Open("mysql", dsn)
if err != nil {
slog.Error("Failed to connect to database")
return nil, err
}
defer db.Close()
var md opentsdb.MultiDataPoint
var name string
var value int
var tagSet opentsdb.TagSet
var rate metadata.RateType
var unit metadata.Unit
tagSet = nil
rate = metadata.Gauge
unit = metadata.Item
description := "Successful backup jobs in the last week"
rows, err := db.Query("SELECT DISTINCT(Name) from Job")
if err != nil {
slog.Error("Query Error: " + err.Error())
return nil, err
}
for rows.Next() {
rows.Scan(&name)
r := db.QueryRow("SELECT count(JobId) as value from Job where RealEndTime>SUBTIME(now(), '7 0:0:0') and JobStatus='T' and Name=?", name)
r.Scan(&value)
slog.Infoln(name, value)
Add(&md, "bacula."+name+".last_week", value, tagSet, rate, unit, description)
}
return md, nil
}
作者:snowsnai
项目:bosu
func watch(root, pattern string, f func()) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
slog.Fatal(err)
}
filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if matched, err := filepath.Match(pattern, info.Name()); err != nil {
slog.Fatal(err)
} else if !matched {
return nil
}
err = watcher.Add(path)
if err != nil {
slog.Fatal(err)
}
return nil
})
slog.Infoln("watching", pattern, "in", root)
wait := time.Now()
go func() {
for {
select {
case event := <-watcher.Events:
if wait.After(time.Now()) {
continue
}
if event.Op&fsnotify.Write == fsnotify.Write {
f()
wait = time.Now().Add(time.Second * 2)
}
case err := <-watcher.Errors:
slog.Errorln("error:", err)
}
}
}()
}
作者:snowsnai
项目:bosu
func main() {
flag.Parse()
if *flagVersion {
fmt.Println(version.GetVersionInfo("bosun"))
os.Exit(0)
}
for _, m := range mains {
m()
}
runtime.GOMAXPROCS(runtime.NumCPU())
c, err := conf.ParseFile(*flagConf)
if err != nil {
slog.Fatal(err)
}
if *flagTest {
os.Exit(0)
}
httpListen := &url.URL{
Scheme: "http",
Host: c.HTTPListen,
}
if strings.HasPrefix(httpListen.Host, ":") {
httpListen.Host = "localhost" + httpListen.Host
}
if err := metadata.Init(httpListen, false); err != nil {
slog.Fatal(err)
}
if err := sched.Load(c); err != nil {
slog.Fatal(err)
}
if c.RelayListen != "" {
go func() {
mux := http.NewServeMux()
mux.Handle("/api/", httputil.NewSingleHostReverseProxy(httpListen))
s := &http.Server{
Addr: c.RelayListen,
Handler: mux,
}
slog.Fatal(s.ListenAndServe())
}()
}
if c.TSDBHost != "" {
if err := collect.Init(httpListen, "bosun"); err != nil {
slog.Fatal(err)
}
tsdbHost := &url.URL{
Scheme: "http",
Host: c.TSDBHost,
}
if *flagReadonly {
rp := httputil.NewSingleHostReverseProxy(tsdbHost)
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/put" {
w.WriteHeader(204)
return
}
rp.ServeHTTP(w, r)
}))
slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost)
tsdbHost, _ = url.Parse(ts.URL)
c.TSDBHost = tsdbHost.Host
}
}
if *flagQuiet {
c.Quiet = true
}
go func() { slog.Fatal(web.Listen(c.HTTPListen, *flagDev, c.TSDBHost)) }()
go func() {
if !*flagNoChecks {
sched.Run()
}
}()
go func() {
sc := make(chan os.Signal, 1)
signal.Notify(sc, os.Interrupt)
killing := false
for range sc {
if killing {
slog.Infoln("Second interrupt: exiting")
os.Exit(1)
}
killing = true
go func() {
slog.Infoln("Interrupt: closing down...")
sched.Close()
slog.Infoln("done")
os.Exit(1)
}()
}
}()
if *flagWatch {
watch(".", "*.go", quit)
watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc)
base := filepath.Join("web", "static", "js")
watch(base, "*.ts", web.RunTsc)
}
select {}
}
作者:eswd
项目:bosu
//.........这里部分代码省略.........
for _, x := range conf.ExtraHop {
check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent))
}
if err != nil {
slog.Fatal(err)
}
collectors.KeepalivedCommunity = conf.KeepalivedCommunity
// Add all process collectors. This is platform specific.
collectors.WatchProcesses()
collectors.WatchProcessesDotNet()
if *flagFake > 0 {
collectors.InitFake(*flagFake)
}
collect.Debug = *flagDebug
util.Debug = *flagDebug
collect.DisableDefaultCollectors = conf.DisableSelf
c := collectors.Search(conf.Filter)
if len(c) == 0 {
slog.Fatalf("Filter %v matches no collectors.", conf.Filter)
}
for _, col := range c {
col.Init()
}
u, err := parseHost(conf.Host)
if *flagList {
list(c)
return
} else if *flagPrint {
u = &url.URL{Scheme: "http", Host: "localhost:0"}
} else if err != nil {
slog.Fatalf("invalid host %v: %v", conf.Host, err)
}
freq := time.Second * time.Duration(conf.Freq)
if freq <= 0 {
slog.Fatal("freq must be > 0")
}
collectors.DefaultFreq = freq
collect.Freq = freq
if conf.BatchSize < 0 {
slog.Fatal("BatchSize must be > 0")
}
if conf.BatchSize != 0 {
collect.BatchSize = conf.BatchSize
}
collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS})
if *flagPrint {
collect.Print = true
}
if !*flagDisableMetadata {
if err := metadata.Init(u, *flagDebug); err != nil {
slog.Fatal(err)
}
}
cdp, cquit := collectors.Run(c)
if u != nil {
slog.Infoln("OpenTSDB host:", u)
}
if err := collect.InitChan(u, "scollector", cdp); err != nil {
slog.Fatal(err)
}
if version.VersionDate != "" {
v, err := strconv.ParseInt(version.VersionDate, 10, 64)
if err == nil {
go func() {
metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
"Scollector version number, which indicates when scollector was built.")
for {
if err := collect.Put("version", collect.Tags, v); err != nil {
slog.Error(err)
}
time.Sleep(time.Hour)
}
}()
}
}
if *flagBatchSize > 0 {
collect.BatchSize = *flagBatchSize
}
go func() {
const maxMem = 500 * 1024 * 1024 // 500MB
var m runtime.MemStats
for range time.Tick(time.Minute) {
runtime.ReadMemStats(&m)
if m.Alloc > maxMem {
panic("memory max reached")
}
}
}()
sChan := make(chan os.Signal)
signal.Notify(sChan, os.Interrupt)
<-sChan
close(cquit)
// try to flush all datapoints on sigterm, but quit after 5 seconds no matter what.
time.AfterFunc(5*time.Second, func() {
os.Exit(0)
})
collect.Flush()
}
作者:trigrass
项目:bosu
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
defer func() {
bosunStartupTime = time.Now()
}()
slog.Infoln("RestoreState")
start := time.Now()
s.Lock("RestoreState")
defer s.Unlock()
s.Search.Lock()
defer s.Search.Unlock()
s.Notifications = nil
decode := func(name string, dst interface{}) error {
var data []byte
err := s.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(dbBucket))
if b == nil {
return fmt.Errorf("unknown bucket: %v", dbBucket)
}
data = b.Get([]byte(name))
return nil
})
if err != nil {
return err
}
gr, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return err
}
defer gr.Close()
return gob.NewDecoder(gr).Decode(dst)
}
if err := decode(dbMetadata, &s.Metadata); err != nil {
slog.Errorln(dbMetadata, err)
}
if err := decode(dbMetricMetadata, &s.metricMetadata); err != nil {
slog.Errorln(dbMetricMetadata, err)
}
for k, v := range s.Metadata {
if k.Name == "desc" || k.Name == "rate" || k.Name == "unit" {
s.PutMetadata(k, v.Value)
delete(s.Metadata, k)
}
}
if err := decode(dbMetric, &s.Search.Metric); err != nil {
slog.Errorln(dbMetric, err)
}
if err := decode(dbTagk, &s.Search.Tagk); err != nil {
slog.Errorln(dbTagk, err)
}
if err := decode(dbTagv, &s.Search.Tagv); err != nil {
slog.Errorln(dbTagv, err)
}
if err := decode(dbMetricTags, &s.Search.MetricTags); err != nil {
slog.Errorln(dbMetricTags, err)
}
notifications := make(map[expr.AlertKey]map[string]time.Time)
if err := decode(dbNotifications, ¬ifications); err != nil {
slog.Errorln(dbNotifications, err)
}
if err := decode(dbSilence, &s.Silence); err != nil {
slog.Errorln(dbSilence, err)
}
if err := decode(dbIncidents, &s.Incidents); err != nil {
slog.Errorln(dbIncidents, err)
}
if err := decode(dbErrors, &s.AlertStatuses); err != nil {
slog.Errorln(dbErrors, err)
}
// Calculate next incident id.
for _, i := range s.Incidents {
if i.Id > s.maxIncidentId {
s.maxIncidentId = i.Id
}
}
status := make(States)
if err := decode(dbStatus, &status); err != nil {
slog.Errorln(dbStatus, err)
}
clear := func(r *Result) {
if r == nil {
return
}
r.Computations = nil
}
for ak, st := range status {
a, present := s.Conf.Alerts[ak.Name()]
if !present {
slog.Errorln("sched: alert no longer present, ignoring:", ak)
continue
} else if s.Conf.Squelched(a, st.Group) {
slog.Infoln("sched: alert now squelched:", ak)
continue
} else {
t := a.Unknown
if t == 0 {
t = s.Conf.CheckFrequency
}
if t == 0 && st.Last().Status == StUnknown {
//.........这里部分代码省略.........
作者:nicolle
项目:bosu
//.........这里部分代码省略.........
if err != nil {
slog.Fatalf("Error adding tag overrides: %s", err)
}
u, err := parseHost(conf.Host)
if *flagList {
list(c)
return
} else if *flagPrint {
u = &url.URL{Scheme: "http", Host: "localhost:0"}
} else if err != nil {
slog.Fatalf("invalid host %v: %v", conf.Host, err)
}
freq := time.Second * time.Duration(conf.Freq)
if freq <= 0 {
slog.Fatal("freq must be > 0")
}
collectors.DefaultFreq = freq
collect.Freq = freq
if conf.BatchSize < 0 {
slog.Fatal("BatchSize must be > 0")
}
if conf.BatchSize != 0 {
collect.BatchSize = conf.BatchSize
}
collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS})
if *flagPrint {
collect.Print = true
}
if !*flagDisableMetadata {
if err := metadata.Init(u, *flagDebug); err != nil {
slog.Fatal(err)
}
}
cdp, cquit := collectors.Run(c)
if u != nil {
slog.Infoln("OpenTSDB host:", u)
}
collect.UseNtlm = conf.UseNtlm
if err := collect.InitChan(u, "scollector", cdp); err != nil {
slog.Fatal(err)
}
if collect.DisableDefaultCollectors == false && version.VersionDate != "" {
v, err := strconv.ParseInt(version.VersionDate, 10, 64)
if err == nil {
go func() {
metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
"Scollector version number, which indicates when scollector was built.")
for {
if err := collect.Put("version", collect.Tags, v); err != nil {
slog.Error(err)
}
time.Sleep(time.Hour)
}
}()
}
}
if *flagBatchSize > 0 {
collect.BatchSize = *flagBatchSize
}
if conf.MaxQueueLen != 0 {
if conf.MaxQueueLen < collect.BatchSize {
slog.Fatalf("MaxQueueLen must be >= %d (BatchSize)", collect.BatchSize)
}
collect.MaxQueueLen = conf.MaxQueueLen
}
maxMemMB := uint64(500)
if conf.MaxMem != 0 {
maxMemMB = conf.MaxMem
}
go func() {
var m runtime.MemStats
for range time.Tick(time.Second * 30) {
runtime.ReadMemStats(&m)
allocMB := m.Alloc / 1024 / 1024
if allocMB > maxMemMB {
slog.Fatalf("memory max runtime reached: (current alloc: %v megabytes, max: %v megabytes)", allocMB, maxMemMB)
}
//See proccess_windows.go and process_linux.go for total process memory usage.
//Note that in linux the rss metric includes shared pages, where as in
//Windows the private working set does not include shared memory.
//Total memory used seems to scale linerarly with m.Alloc.
//But we want this to catch a memory leak outside the runtime (WMI/CGO).
//So for now just add any runtime allocations to the allowed total limit.
maxMemTotalMB := maxMemMB + allocMB
if collectors.TotalScollectorMemoryMB > maxMemTotalMB {
slog.Fatalf("memory max total reached: (current total: %v megabytes, current runtime alloc: %v megabytes, max: %v megabytes)", collectors.TotalScollectorMemoryMB, allocMB, maxMemTotalMB)
}
}
}()
sChan := make(chan os.Signal)
signal.Notify(sChan, os.Interrupt)
<-sChan
close(cquit)
// try to flush all datapoints on sigterm, but quit after 5 seconds no matter what.
time.AfterFunc(5*time.Second, func() {
os.Exit(0)
})
collect.Flush()
}
作者:Victoria
项目:bosu
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
defer func() {
bosunStartupTime = time.Now()
}()
slog.Infoln("RestoreState")
start := time.Now()
s.Lock("RestoreState")
defer s.Unlock()
s.Search.Lock()
defer s.Search.Unlock()
s.Notifications = nil
db := s.db
notifications := make(map[models.AlertKey]map[string]time.Time)
if err := decode(db, dbNotifications, ¬ifications); err != nil {
slog.Errorln(dbNotifications, err)
}
//status := make(States)
// if err := decode(db, dbStatus, &status); err != nil {
// slog.Errorln(dbStatus, err)
// }
// clear := func(r *models.Result) {
// if r == nil {
// return
// }
// r.Computations = nil
//}
//TODO: ???
// for ak, st := range status {
// a, present := s.Conf.Alerts[ak.Name()]
// if !present {
// slog.Errorln("sched: alert no longer present, ignoring:", ak)
// continue
// } else if s.Conf.Squelched(a, st.Group) {
// slog.Infoln("sched: alert now squelched:", ak)
// continue
// } else {
// t := a.Unknown
// if t == 0 {
// t = s.Conf.CheckFrequency
// }
// if t == 0 && st.Last().Status == StUnknown {
// st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId})
// }
// }
// clear(st.Result)
// newHistory := []Event{}
// for _, e := range st.History {
// clear(e.Warn)
// clear(e.Crit)
// // Remove error events which no longer are a thing.
// if e.Status <= StUnknown {
// newHistory = append(newHistory, e)
// }
// }
// st.History = newHistory
// s.status[ak] = st
// if a.Log && st.Open {
// st.Open = false
// slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status())
// }
// for name, t := range notifications[ak] {
// n, present := s.Conf.Notifications[name]
// if !present {
// slog.Infoln("sched: notification not present during restore:", name)
// continue
// }
// if a.Log {
// slog.Infoln("sched: alert is now log, removing notification:", ak)
// continue
// }
// s.AddNotification(ak, n, t)
// }
//}
if err := migrateOldDataToRedis(db, s.DataAccess); err != nil {
return err
}
// delete metrictags if they exist.
deleteKey(s.db, "metrictags")
slog.Infoln("RestoreState done in", time.Since(start))
return nil
}