Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR - BFD implementation for faster failover detection #518

Merged
merged 7 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/docker-multiarch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Docker-Multi-Arch

on:
workflow_dispatch:
inputs:
tagName:
description: 'Tag Name'
required: true
default: 'latest'

jobs:
build:
runs-on: ubuntu-latest
name: build for amd64/arm64
steps:
- uses: actions/checkout@v2
with:
submodules: recursive

- name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Setup hardware emulator using QEMU
- name: Set up QEMU
uses: docker/setup-qemu-action@v2

# Setup Docker Buildx for multi-arch images
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Build Check
if: |
github.repository != 'loxilb-io/loxilb'
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/amd64, linux/arm64
push: false
tags: ghcr.io/loxilb-io/loxilb:${{ github.event.inputs.tagName }}

- name: Build and push
if: |
github.repository == 'loxilb-io/loxilb'
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/amd64, linux/arm64
push: true
tags: ghcr.io/loxilb-io/loxilb:${{ github.event.inputs.tagName }}
126 changes: 30 additions & 96 deletions loxinet/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,13 @@
package loxinet

import (
"errors"
"fmt"
cmn "github.com/loxilb-io/loxilb/common"
opts "github.com/loxilb-io/loxilb/options"
bfd "github.com/loxilb-io/loxilb/proto"
tk "github.com/loxilb-io/loxilib"

"bufio"
"errors"
"fmt"
"net"
"os"
"os/exec"
"time"
)

Expand Down Expand Up @@ -60,13 +57,25 @@ type ClusterNode struct {
// CIStateH - Cluster context handler
type CIStateH struct {
SpawnKa bool
kaMode bool
RemoteIP net.IP
ClusterMap map[string]*ClusterInstance
StateMap map[string]int
NodeMap map[string]*ClusterNode
}

func kaSpawn() {
func (ci *CIStateH) BFDSessionNotify(instance string, remote string, ciState string) {
var sm cmn.HASMod

sm.Instance = instance
sm.State = ciState
sm.Vip = net.ParseIP("0.0.0.0")
tk.LogIt(tk.LogInfo, "ci-change instance %s - state %s vip %v\n", instance, ciState, sm.Vip)
mh.mtx.Lock()
defer mh.mtx.Unlock()
ci.CIStateUpdate(sm)
}

func (ci *CIStateH) startBFDProto() {
url := fmt.Sprintf("http://127.0.0.1:%d/config/params", opts.Opts.Port)
for {
if IsLoxiAPIActive(url) {
Expand All @@ -76,107 +85,31 @@ func kaSpawn() {
time.Sleep(1 * time.Second)
}

RunCommand("rm -f /etc/shared/keepalive.state", false)
RunCommand("pkill keepalived", false)
mh.dp.WaitXsyncReady("ka")
// We need some cool-off period for loxilb to self sync-up in the cluster
time.Sleep(KAInitTiVal * time.Second)

for {
if exists := FileExists(KAConfigFile); !exists {
time.Sleep(2000 * time.Millisecond)
continue
}

pid := ReadPIDFile(KAPidFile1)
if pid != 0 {
time.Sleep(5000 * time.Millisecond)
continue
}

tk.LogIt(tk.LogInfo, "KA spawning\n")
cmd := exec.Command("/usr/sbin/keepalived", "-f", KAConfigFile, "-n")
err := cmd.Run()
if err != nil {
tk.LogIt(tk.LogError, "Error in running KA:%s\n", err)
} else {
tk.LogIt(tk.LogInfo, "KA found dead. Reaping\n")
}

rmf := fmt.Sprintf("rm -f %s", KAPidFile1)
RunCommand(rmf, false)
rmf = fmt.Sprintf("rm -f %s", KAPidFile2)
RunCommand(rmf, false)

time.Sleep(2000 * time.Millisecond)
}
}

func (h *CIStateH) CISync() {
var sm cmn.HASMod
var ciState int
var ok bool
clusterStateFile := "/etc/shared/keepalive.state"
rf, err := os.Open(clusterStateFile)
if err == nil {

fsc := bufio.NewScanner(rf)
fsc.Split(bufio.ScanLines)

for fsc.Scan() {
var inst string
var state string
var vip string
// Format style -
// INSTANCE default is in BACKUP state
_, err = fmt.Sscanf(fsc.Text(), "INSTANCE %s is in %s state vip %s", &inst, &state, &vip)
if err != nil {
continue
}

if ciState, ok = h.StateMap[state]; !ok {
continue
}

notify := false

if eci, ok := h.ClusterMap[inst]; !ok {
notify = true
} else {
if eci.State != ciState {
notify = true
}
}

if notify {
sm.Instance = inst
sm.State = state
sm.Vip = net.ParseIP(vip)
tk.LogIt(tk.LogInfo, "ci-change instance %s - state %s vip %v\n", inst, state, sm.Vip)
h.CIStateUpdate(sm)
}
}

rf.Close()
bs := bfd.StructNew(3784)
err := bs.BFDAddRemote(ci.RemoteIP.String(), 3784, bfd.BFDMinSysTXIntervalUs, 3, "Default", ci)
if err != nil {
tk.LogIt(tk.LogCritical, "KA - Cant add BFD remote\n")
}
}

// CITicker - Periodic ticker for Cluster module
func (h *CIStateH) CITicker() {
mh.mtx.Lock()
h.CISync()
mh.mtx.Unlock()
// Nothing to do currently
}

// CISpawn - Spawn CI application
func (h *CIStateH) CISpawn() {
if h.SpawnKa {
go kaSpawn()
func (ci *CIStateH) CISpawn() {
if ci.SpawnKa {
go ci.startBFDProto()
}
}

// CIInit - routine to initialize Cluster context
func CIInit(spawnKa bool, kaMode bool) *CIStateH {
func CIInit(spawnKa bool, remoteIP net.IP) *CIStateH {
var nCIh = new(CIStateH)
nCIh.StateMap = make(map[string]int)
nCIh.StateMap["MASTER"] = cmn.CIStateMaster
Expand All @@ -185,7 +118,7 @@ func CIInit(spawnKa bool, kaMode bool) *CIStateH {
nCIh.StateMap["STOP"] = cmn.CIStateNotDefined
nCIh.StateMap["NOT_DEFINED"] = cmn.CIStateNotDefined
nCIh.SpawnKa = spawnKa
nCIh.kaMode = kaMode
nCIh.RemoteIP = remoteIP
nCIh.ClusterMap = make(map[string]*ClusterInstance)

if _, ok := nCIh.ClusterMap[cmn.CIDefault]; !ok {
Expand Down Expand Up @@ -237,9 +170,9 @@ func (h *CIStateH) CIVipGet(inst string) (net.IP, error) {
return net.IPv4zero, errors.New("not found")
}

// IsCIKAMode - routine to get HA state
// IsCIKAMode - routine to get KA mode
func (h *CIStateH) IsCIKAMode() bool {
return h.kaMode
return false
}

// CIStateUpdate - routine to update cluster state
Expand Down Expand Up @@ -274,6 +207,7 @@ func (h *CIStateH) CIStateUpdate(cm cmn.HASMod) (int, error) {
if mh.bgp != nil {
mh.bgp.UpdateCIState(cm.Instance, ci.State, ci.Vip)
}
mh.zr.Rules.RuleVIPSyncToClusterState()
return ci.State, nil
}

Expand Down
9 changes: 9 additions & 0 deletions loxinet/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -2582,3 +2582,12 @@ func (R *RuleH) AdvRuleVIPIfL2(IP net.IP) error {

return nil
}

func (R *RuleH) RuleVIPSyncToClusterState() {
for vip := range R.vipMap {
ip := net.ParseIP(vip)
if ip != nil {
R.AdvRuleVIPIfL2(ip)
}
}
}
25 changes: 12 additions & 13 deletions loxinet/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"encoding/binary"
"errors"
"fmt"
tk "github.com/loxilb-io/loxilib"
"io/ioutil"
"net"
"net/http"
Expand All @@ -32,9 +33,6 @@ import (
"strconv"
"syscall"
"time"

opts "github.com/loxilb-io/loxilb/options"
tk "github.com/loxilb-io/loxilib"
)

// IterIntf - interface implementation to iterate various loxinet
Expand Down Expand Up @@ -140,18 +138,19 @@ func LogString2Level(logStr string) tk.LogLevelT {
}

// KAString2Mode - Convert ka mode in string opts to spawn/KAMode
func KAString2Mode(kaStr string) (bool, bool) {
func KAString2Mode(kaStr string) (bool, net.IP) {
spawnKa := false
kaMode := false
switch opts.Opts.Ka {
case "in":
spawnKa = true
kaMode = true
case "out":
spawnKa = false
kaMode = true

if kaStr == "none" {
return spawnKa, nil
}

remote := net.ParseIP(kaStr)
if remote == nil {
return spawnKa, remote
}
return spawnKa, kaMode
spawnKa = true
return spawnKa, remote
}

// HTTPSProber - Do a https probe for given url
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
"time"
)

var version string = "0.9.1"
var version string = "0.9.2-beta"
var buildInfo string = ""

func main() {
Expand Down
2 changes: 1 addition & 1 deletion options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

var Opts struct {
Bgp bool `short:"b" long:"bgp" description:"Connect and Sync with GoBGP server"`
Ka string `short:"k" long:"ka" description:"One of in,out"`
Ka string `short:"k" long:"ka" description:"KeepAlive/BFD RemoteIP" default:"none"`
Version bool `short:"v" long:"version" description:"Show loxilb version"`
NoAPI bool `short:"a" long:"api" description:"Run Rest API server"`
NoNlp bool `short:"n" long:"nonlp" description:"Do not register with nlp"`
Expand Down
Loading
Loading