trafficcontrol-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r..@apache.org
Subject [trafficcontrol] 01/04: Add Traffic Monitor Disk Support
Date Fri, 14 Dec 2018 04:03:36 GMT
This is an automated email from the ASF dual-hosted git repository.

rob pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git

commit 3619985a3ee3c476ee34607c0b8e21fcd9568a9a
Author: Evan Zelkowitz <19699200+ezelkow1@users.noreply.github.com>
AuthorDate: Wed Nov 21 11:02:35 2018 -0700

    Add Traffic Monitor Disk Support
    
    With these changes traffic monitor will create TM and CR backup files on disk. If TO is
unreachable and these files exist then it will pull its data from these files. Any time a
new valid one is available it will write them back to disk. On
    startup TM will still follow its exponential backoff scheme but once it hits the max it
will create a non-authenticated TO session so that we can use the disk files. If at any point
TO comes back up then TM will attempt to authenticate and
    continue as normal
---
 docs/source/development/traffic_monitor.rst |  8 +++
 traffic_monitor/config/config.go            | 22 ++++++++
 traffic_monitor/manager/manager.go          |  2 +-
 traffic_monitor/manager/opsconfig.go        | 26 ++++++++++
 traffic_monitor/towrap/towrap.go            | 79 +++++++++++++++++++++++++----
 5 files changed, 125 insertions(+), 12 deletions(-)

diff --git a/docs/source/development/traffic_monitor.rst b/docs/source/development/traffic_monitor.rst
index 4be0fdb..a835d44 100644
--- a/docs/source/development/traffic_monitor.rst
+++ b/docs/source/development/traffic_monitor.rst
@@ -192,6 +192,14 @@ Shared Data
 -----------
 Processed and aggregated data must be shared between the end of the stat and health processing
pipelines, and HTTP requests. The CSP paradigm of idiomatic Go does not work efficiently with
storing and sharing state. While not idiomatic Go, shared mutexed data structures are faster
and simpler than CSP manager microthreads for each data object. Traffic Monitor has many thread-safe
shared data types and objects. All shared data objects can be seen in ``manager/manager.go:Start()``,
where t [...]
 
+Disk Backup
+------------
+The traffic monitor config and CR config are both stored as backup files (tmconfig.backup
and crconfig.backup or what ever you set the values to in the config file). This allows the
monitor to come up and continue serving even if traffic ops 
+is down.  These files are updated any time a valid config is received from traffic ops, so
if traffic ops goes down and the monitor is restarted it can still serve the previous data.
 These files can also be manually edited and the changes 
+will be reloaded in to traffic monitor so that if traffic ops is down or unreachable for
an extended period of time manual updates can be done. If on initial startup trafficops is
unavailable then traffic monitor will continue through it's 
+exponential backoff until it hits the max retry interval, at that point it will create an
un-authenticated trafficops session and use the data from disk. It will still poll trafficops
for updates though and if it successfully gets through 
+then it will login at that point.
+
 Formatting Conventions
 ======================
 Go code should be formatted with ``gofmt``. See also ``CONTRIBUTING.md``.
diff --git a/traffic_monitor/config/config.go b/traffic_monitor/config/config.go
index 8017950..4f38343 100644
--- a/traffic_monitor/config/config.go
+++ b/traffic_monitor/config/config.go
@@ -40,6 +40,10 @@ const (
 	LogLocationNull = "null"
 	//StaticFileDir is the directory that contains static html and js files.
 	StaticFileDir = "/opt/traffic_monitor/static/"
+	//CrConfigBackupFile is the default file name to store the last crconfig
+	CrConfigBackupFile = "crconfig.backup"
+	//TmConfigBackupFile is the default file name to store the last tmconfig
+	TmConfigBackupFile = "tmconfig.backup"
 )
 
 // Config is the configuration for the application. It includes myriad data, such as polling
intervals and log locations.
@@ -68,6 +72,9 @@ type Config struct {
 	CRConfigHistoryCount         uint64        `json:"crconfig_history_count"`
 	TrafficOpsMinRetryInterval   time.Duration `json:"-"`
 	TrafficOpsMaxRetryInterval   time.Duration `json:"-"`
+	CrConfigBackupFile           string        `json:"crconfig_backup_file"`
+	TmConfigBackupFile           string        `json:"tmconfig_backup_file"`
+	TrafficOpsDiskRetryMax       uint64        `json:"-"`
 }
 
 func (c Config) ErrorLog() log.LogLocation   { return log.LogLocation(c.LogLocationError)
}
@@ -102,6 +109,9 @@ var DefaultConfig = Config{
 	CRConfigHistoryCount:         20000,
 	TrafficOpsMinRetryInterval:   100 * time.Millisecond,
 	TrafficOpsMaxRetryInterval:   60000 * time.Millisecond,
+	CrConfigBackupFile:           CrConfigBackupFile,
+	TmConfigBackupFile:           TmConfigBackupFile,
+	TrafficOpsDiskRetryMax:       2,
 }
 
 // MarshalJSON marshals custom millisecond durations. Aliasing inspired by http://choly.ca/post/go-json-marshalling/
@@ -152,6 +162,9 @@ func (c *Config) UnmarshalJSON(data []byte) error {
 		ServeWriteTimeoutMs            *uint64 `json:"serve_write_timeout_ms"`
 		TrafficOpsMinRetryIntervalMs   *uint64 `json:"traffic_ops_min_retry_interval_ms"`
 		TrafficOpsMaxRetryIntervalMs   *uint64 `json:"traffic_ops_max_retry_interval_ms"`
+		TrafficOpsDiskRetryMax         *uint64 `json:"traffic_ops_disk_retry_max"`
+		CrConfigBackupFile             *string `json:"crconfig_backup_file"`
+		TmConfigBackupFile             *string `json:"tmconfig_backup_file"`
 		*Alias
 	}{
 		Alias: (*Alias)(c),
@@ -200,6 +213,15 @@ func (c *Config) UnmarshalJSON(data []byte) error {
 	if aux.TrafficOpsMaxRetryIntervalMs != nil {
 		c.TrafficOpsMaxRetryInterval = time.Duration(*aux.TrafficOpsMaxRetryIntervalMs) * time.Millisecond
 	}
+	if aux.TrafficOpsDiskRetryMax != nil {
+		c.TrafficOpsDiskRetryMax = *aux.TrafficOpsDiskRetryMax
+	}
+	if aux.CrConfigBackupFile != nil {
+		c.CrConfigBackupFile = *aux.CrConfigBackupFile
+	}
+	if aux.TmConfigBackupFile != nil {
+		c.TmConfigBackupFile = *aux.TmConfigBackupFile
+	}
 	return nil
 }
 
diff --git a/traffic_monitor/manager/manager.go b/traffic_monitor/manager/manager.go
index 6ba9dc9..f6f0c0f 100644
--- a/traffic_monitor/manager/manager.go
+++ b/traffic_monitor/manager/manager.go
@@ -43,7 +43,7 @@ import (
 // Start starts the poller and handler goroutines
 //
 func Start(opsConfigFile string, cfg config.Config, appData config.StaticAppData, trafficMonitorConfigFileName
string) error {
-	toSession := towrap.ITrafficOpsSession(towrap.NewTrafficOpsSessionThreadsafe(nil, cfg.CRConfigHistoryCount))
+	toSession := towrap.ITrafficOpsSession(towrap.NewTrafficOpsSessionThreadsafe(nil, cfg.CRConfigHistoryCount,
cfg))
 
 	localStates := peer.NewCRStatesThreadsafe() // this is the local state as discoverer by
this traffic_monitor
 	fetchCount := threadsafe.NewUint()          // note this is the number of individual caches
fetched from, not the number of times all the caches were polled.
diff --git a/traffic_monitor/manager/opsconfig.go b/traffic_monitor/manager/opsconfig.go
index 7b34bcd..6da71bd 100644
--- a/traffic_monitor/manager/opsconfig.go
+++ b/traffic_monitor/manager/opsconfig.go
@@ -20,9 +20,12 @@ package manager
  */
 
 import (
+	"crypto/tls"
 	"fmt"
 	"io/ioutil"
 	"net"
+	"net/http"
+	"net/http/cookiejar"
 	"time"
 
 	"golang.org/x/sys/unix"
@@ -138,6 +141,7 @@ func StartOpsConfigManager(
 		trafficOpsRequestTimeout := time.Second * time.Duration(10)
 		var realToSession *to.Session
 		var toAddr net.Addr
+		var toLoginCount uint64
 
 		// fixed an issue here where traffic_monitor loops forever, doing nothing useful if traffic_ops
is down,
 		// and would never logging in again.  since traffic_monitor  is just starting up here,
keep retrying until traffic_ops is reachable and a session can be established.
@@ -154,6 +158,28 @@ func StartOpsConfigManager(
 				duration := backoff.BackoffDuration()
 				log.Errorf("retrying in %v\n", duration)
 				time.Sleep(duration)
+
+				if toSession.BackupFileExists() && (toLoginCount >= cfg.TrafficOpsDiskRetryMax)
{
+					jar, err := cookiejar.New(nil)
+					if err != nil {
+						log.Errorf("Err getting cookiejar")
+						continue
+					}
+
+					realToSession = to.NewSession(newOpsConfig.Username, newOpsConfig.Password, newOpsConfig.Url,
staticAppData.UserAgent, &http.Client{
+						Timeout: trafficOpsRequestTimeout,
+						Transport: &http.Transport{
+							TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+						},
+						Jar: jar,
+					}, useCache)
+					toSession.Set(realToSession)
+					// At this point we have a valid 'dummy' session. This will allow us to pull from disk
but will also retry when TO comes up
+					log.Errorf("error instantiating Session with traffic_ops, backup disk files exist, creating
empty traffic_ops session to read")
+					break
+				}
+
+				toLoginCount++
 				continue
 			} else {
 				toSession.Set(realToSession)
diff --git a/traffic_monitor/towrap/towrap.go b/traffic_monitor/towrap/towrap.go
index a017945..fc1cc9e 100644
--- a/traffic_monitor/towrap/towrap.go
+++ b/traffic_monitor/towrap/towrap.go
@@ -22,12 +22,15 @@ package towrap
 import (
 	"errors"
 	"fmt"
+	"io/ioutil"
+	"os"
 	"strconv"
 	"sync"
 	"time"
 
 	"github.com/apache/trafficcontrol/lib/go-log"
 	"github.com/apache/trafficcontrol/lib/go-tc"
+	"github.com/apache/trafficcontrol/traffic_monitor/config"
 	"github.com/apache/trafficcontrol/traffic_ops/client"
 
 	"github.com/json-iterator/go"
@@ -47,6 +50,7 @@ type ITrafficOpsSession interface {
 	DeliveryServices() ([]tc.DeliveryService, error)
 	CacheGroups() ([]tc.CacheGroupNullable, error)
 	CRConfigHistory() []CRConfigStat
+	BackupFileExists() bool
 }
 
 var ErrNilSession = fmt.Errorf("nil session")
@@ -63,6 +67,15 @@ type ByteMapCache struct {
 	m     *sync.RWMutex
 }
 
+func (s TrafficOpsSessionThreadsafe) BackupFileExists() bool {
+	if _, err := os.Stat(s.CrConfigBackupFile); !os.IsNotExist(err) {
+		if _, err = os.Stat(s.TmConfigBackupFile); !os.IsNotExist(err) {
+			return true
+		}
+	}
+	return false
+}
+
 func NewByteMapCache() ByteMapCache {
 	return ByteMapCache{m: &sync.RWMutex{}, cache: &map[string]ByteTime{}}
 }
@@ -148,15 +161,17 @@ type CRConfigStat struct {
 
 // TrafficOpsSessionThreadsafe provides access to the Traffic Ops client safe for multiple
goroutines. This fulfills the ITrafficOpsSession interface.
 type TrafficOpsSessionThreadsafe struct {
-	session      **client.Session // pointer-to-pointer, because we're given a pointer from
the Traffic Ops package, and we don't want to copy it.
-	m            *sync.Mutex
-	lastCRConfig ByteMapCache
-	crConfigHist CRConfigHistoryThreadsafe
+	session            **client.Session // pointer-to-pointer, because we're given a pointer
from the Traffic Ops package, and we don't want to copy it.
+	m                  *sync.Mutex
+	lastCRConfig       ByteMapCache
+	crConfigHist       CRConfigHistoryThreadsafe
+	CrConfigBackupFile string
+	TmConfigBackupFile string
 }
 
 // NewTrafficOpsSessionThreadsafe returns a new threadsafe TrafficOpsSessionThreadsafe wrapping
the given `Session`.
-func NewTrafficOpsSessionThreadsafe(s *client.Session, crConfigHistoryLimit uint64) TrafficOpsSessionThreadsafe
{
-	return TrafficOpsSessionThreadsafe{session: &s, m: &sync.Mutex{}, lastCRConfig:
NewByteMapCache(), crConfigHist: NewCRConfigHistoryThreadsafe(crConfigHistoryLimit)}
+func NewTrafficOpsSessionThreadsafe(s *client.Session, crConfigHistoryLimit uint64, cfg config.Config)
TrafficOpsSessionThreadsafe {
+	return TrafficOpsSessionThreadsafe{session: &s, m: &sync.Mutex{}, lastCRConfig:
NewByteMapCache(), crConfigHist: NewCRConfigHistoryThreadsafe(crConfigHistoryLimit), CrConfigBackupFile:
cfg.CrConfigBackupFile, TmConfigBackupFile: cfg.TmConfigBackupFile}
 }
 
 // Set sets the internal Traffic Ops session. This is safe for multiple goroutines, being
aware they will race.
@@ -229,16 +244,36 @@ func (s *TrafficOpsSessionThreadsafe) CRConfigValid(crc *tc.CRConfig,
cdn string
 
 // CRConfigRaw returns the CRConfig from the Traffic Ops. This is safe for multiple goroutines.
 func (s TrafficOpsSessionThreadsafe) CRConfigRaw(cdn string) ([]byte, error) {
+
 	ss := s.get()
+
+	var b []byte
+	var err error
+	var remoteAddr string
+	var reqInf client.ReqInf
+
 	if ss == nil {
 		return nil, ErrNilSession
+	} else {
+		b, reqInf, err = ss.GetCRConfig(cdn)
+		if err == nil {
+			remoteAddr = reqInf.RemoteAddr.String()
+			ioutil.WriteFile(s.CrConfigBackupFile, b, 0644)
+		} else {
+			if s.BackupFileExists() {
+				b, _ = ioutil.ReadFile(s.CrConfigBackupFile)
+				remoteAddr = "127.0.0.1"
+				log.Errorln("Error getting CRConfig from traffic_ops, backup file exists, reading from
file")
+			} else {
+				return nil, ErrNilSession
+			}
+		}
 	}
-	b, reqInf, err := ss.GetCRConfig(cdn)
 
-	hist := &CRConfigStat{time.Now(), reqInf.RemoteAddr.String(), tc.CRConfigStats{}, err}
+	hist := &CRConfigStat{time.Now(), remoteAddr, tc.CRConfigStats{}, err}
 	defer s.crConfigHist.Add(hist)
 
-	if err != nil {
+	if err != nil && remoteAddr != "127.0.0.1" {
 		return b, err
 	}
 
@@ -277,8 +312,30 @@ func (s TrafficOpsSessionThreadsafe) trafficMonitorConfigMapRaw(cdn string)
(*tc
 	if ss == nil {
 		return nil, ErrNilSession
 	}
-	configMap, _, error := ss.GetTrafficMonitorConfigMap(cdn)
-	return configMap, error
+	var configMap *tc.TrafficMonitorConfigMap
+	var err error
+
+	configMap, _, err = ss.GetTrafficMonitorConfigMap(cdn)
+	if configMap != nil {
+		json := jsoniter.ConfigFastest
+		data, err := json.Marshal(*configMap)
+		if err == nil {
+			ioutil.WriteFile(s.TmConfigBackupFile, data, 0644)
+		}
+	} else {
+		if s.BackupFileExists() {
+			b, _ := ioutil.ReadFile(s.TmConfigBackupFile)
+			log.Errorln("Error getting configMap from traffic_ops, backup file exists, reading from
file")
+			json := jsoniter.ConfigFastest
+			err = json.Unmarshal(b, &configMap)
+			if err != nil {
+				log.Warnf("Error unmarshaling TmConfigBackupFile, ", err)
+			}
+
+		}
+	}
+
+	return configMap, err
 }
 
 // TrafficMonitorConfigMap returns the Traffic Monitor config map from the Traffic Ops. This
is safe for multiple goroutines.


Mime
View raw message