Skip to content

Commit

Permalink
*: let patrol region tick interval configurable (#1084)
Browse files Browse the repository at this point in the history
  • Loading branch information
nolouch authored and disksing committed May 21, 2018
1 parent 2a6f356 commit 7e98269
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 5 deletions.
4 changes: 4 additions & 0 deletions server/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,10 @@ func (c *clusterInfo) GetSplitMergeInterval() time.Duration {
return c.opt.GetSplitMergeInterval()
}

func (c *clusterInfo) GetPatrolRegionInterval() time.Duration {
return c.opt.GetPatrolRegionInterval()
}

func (c *clusterInfo) GetMaxStoreDownTime() time.Duration {
return c.opt.GetMaxStoreDownTime()
}
Expand Down
5 changes: 5 additions & 0 deletions server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ type ScheduleConfig struct {
MaxMergeRegionSize uint64 `toml:"max-merge-region-size,omitempty" json:"max-merge-region-size"`
// SplitMergeInterval is the minimum interval time to permit merge after split.
SplitMergeInterval typeutil.Duration `toml:"split-merge-interval,omitempty" json:"split-merge-interval"`
// PatrolRegionInterval is the interval for scanning region during patrol.
PatrolRegionInterval typeutil.Duration `toml:"patrol-region-interval,omitempty" json:"patrol-region-interval"`
// MaxStoreDownTime is the max duration after which
// a store will be considered to be down if it hasn't reported heartbeats.
MaxStoreDownTime typeutil.Duration `toml:"max-store-down-time,omitempty" json:"max-store-down-time"`
Expand Down Expand Up @@ -399,6 +401,7 @@ func (c *ScheduleConfig) clone() *ScheduleConfig {
MaxPendingPeerCount: c.MaxPendingPeerCount,
MaxMergeRegionSize: c.MaxMergeRegionSize,
SplitMergeInterval: c.SplitMergeInterval,
PatrolRegionInterval: c.PatrolRegionInterval,
MaxStoreDownTime: c.MaxStoreDownTime,
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
Expand All @@ -418,6 +421,7 @@ const (
defaultMaxPendingPeerCount = 16
defaultMaxMergeRegionSize = 0
defaultSplitMergeInterval = 1 * time.Hour
defaultPatrolRegionInterval = 100 * time.Millisecond
defaultMaxStoreDownTime = 30 * time.Minute
defaultLeaderScheduleLimit = 4
defaultRegionScheduleLimit = 4
Expand All @@ -433,6 +437,7 @@ func (c *ScheduleConfig) adjust() error {
adjustUint64(&c.MaxPendingPeerCount, defaultMaxPendingPeerCount)
adjustUint64(&c.MaxMergeRegionSize, defaultMaxMergeRegionSize)
adjustDuration(&c.SplitMergeInterval, defaultSplitMergeInterval)
adjustDuration(&c.PatrolRegionInterval, defaultPatrolRegionInterval)
adjustDuration(&c.MaxStoreDownTime, defaultMaxStoreDownTime)
adjustUint64(&c.LeaderScheduleLimit, defaultLeaderScheduleLimit)
adjustUint64(&c.RegionScheduleLimit, defaultRegionScheduleLimit)
Expand Down
14 changes: 9 additions & 5 deletions server/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ const (
regionheartbeatSendChanCap = 1024
hotRegionScheduleName = "balance-hot-region-scheduler"

patrolRegionInterval = time.Millisecond * 100
patrolScanRegionLimit = 128 // It takes about 14 minutes to iterate 1 million regions.
)

Expand Down Expand Up @@ -115,15 +114,16 @@ func (c *coordinator) patrolRegions() {
defer logutil.LogPanic()

defer c.wg.Done()
ticker := time.NewTicker(patrolRegionInterval)
defer ticker.Stop()
timer := time.NewTimer(c.cluster.GetPatrolRegionInterval())
defer timer.Stop()

log.Info("coordinator: start patrol regions")

start := time.Now()
var key []byte
for {
select {
case <-ticker.C:
case <-timer.C:
timer.Reset(c.cluster.GetPatrolRegionInterval())
case <-c.ctx.Done():
return
}
Expand All @@ -149,6 +149,10 @@ func (c *coordinator) patrolRegions() {
}
// update label level isolation statistics.
c.cluster.updateRegionsLabelLevelStats(regions)
if len(key) == 0 {
patrolCheckRegionsHistogram.Observe(time.Since(start).Seconds())
start = time.Now()
}
}
}

Expand Down
9 changes: 9 additions & 0 deletions server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ var (
Name: "metadata",
Help: "Record critical metadata.",
}, []string{"type"})
patrolCheckRegionsHistogram = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "patrol",
Name: "checks_regions",
Help: "Bucketed histogram of time spend(s) of patrol checks region.",
Buckets: prometheus.ExponentialBuckets(1, 2, 15),
})
)

func init() {
Expand All @@ -164,4 +172,5 @@ func init() {
prometheus.MustRegister(regionStatusGauge)
prometheus.MustRegister(regionLabelLevelGauge)
prometheus.MustRegister(metadataGauge)
prometheus.MustRegister(patrolCheckRegionsHistogram)
}
4 changes: 4 additions & 0 deletions server/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func (o *scheduleOption) GetSplitMergeInterval() time.Duration {
return o.load().SplitMergeInterval.Duration
}

func (o *scheduleOption) GetPatrolRegionInterval() time.Duration {
return o.load().PatrolRegionInterval.Duration
}

func (o *scheduleOption) GetMaxStoreDownTime() time.Duration {
return o.load().MaxStoreDownTime.Duration
}
Expand Down

0 comments on commit 7e98269

Please sign in to comment.