fix: SDWAN TUN device lifecycle + stability
Key fixes: - SDWAN config: use absolute path /root/.openclaw/workspace/inp2p/sdwan.json - Client: register handlers BEFORE ReadLoop (race condition fix) - Client: make ensureTUNReader non-fatal on error - Client: fix TUN device conflict between ip tuntap add and ioctl - Client: fix panic on empty TUN read (n==0 check) - Build: static binary with -extldflags=-static for glibc compatibility Verified: hcss(10.10.0.3) <-> i-6986(10.10.0.2) ping 5/5, 0% loss, 44ms
This commit is contained in:
@@ -115,6 +115,10 @@ func (c *Client) connectAndRun() error {
|
|||||||
c.conn = signal.NewConn(ws)
|
c.conn = signal.NewConn(ws)
|
||||||
defer c.conn.Close()
|
defer c.conn.Close()
|
||||||
|
|
||||||
|
// Register handlers BEFORE ReadLoop so server-pushed messages
|
||||||
|
// (e.g. SDWANConfig sent right after LoginRsp) are not dropped.
|
||||||
|
c.registerHandlers()
|
||||||
|
|
||||||
// Start ReadLoop in background BEFORE sending login
|
// Start ReadLoop in background BEFORE sending login
|
||||||
// (so waiter can receive the LoginRsp)
|
// (so waiter can receive the LoginRsp)
|
||||||
readErr := make(chan error, 1)
|
readErr := make(chan error, 1)
|
||||||
@@ -158,10 +162,7 @@ func (c *Client) connectAndRun() error {
|
|||||||
// 4. Send ReportBasic
|
// 4. Send ReportBasic
|
||||||
c.sendReportBasic()
|
c.sendReportBasic()
|
||||||
|
|
||||||
// 5. Register handlers
|
// 5. Start heartbeat
|
||||||
c.registerHandlers()
|
|
||||||
|
|
||||||
// 6. Start heartbeat
|
|
||||||
c.wg.Add(1)
|
c.wg.Add(1)
|
||||||
go c.heartbeatLoop()
|
go c.heartbeatLoop()
|
||||||
|
|
||||||
@@ -555,18 +556,12 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
|
|||||||
if selfIP == "" {
|
if selfIP == "" {
|
||||||
return fmt.Errorf("node %s not found in sdwan nodes", c.cfg.Node)
|
return fmt.Errorf("node %s not found in sdwan nodes", c.cfg.Node)
|
||||||
}
|
}
|
||||||
if err := runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun"); err != nil {
|
// Use ioctl method only - it creates the device if not exists
|
||||||
if !(strings.Contains(err.Error(), "File exists") || strings.Contains(err.Error(), "Device or resource busy")) {
|
// Skip ip tuntap add to avoid conflicts
|
||||||
return err
|
_ = runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun")
|
||||||
}
|
_ = runCmd("ip", "link", "set", "dev", "optun", "up")
|
||||||
}
|
|
||||||
_ = runCmd("ip", "link", "set", "dev", "optun", "mtu", "1420")
|
_ = runCmd("ip", "link", "set", "dev", "optun", "mtu", "1420")
|
||||||
if err := runCmd("ip", "addr", "replace", fmt.Sprintf("%s/32", selfIP), "dev", "optun"); err != nil {
|
_ = runCmd("ip", "addr", "add", selfIP+"/32", "dev", "optun")
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := runCmd("ip", "link", "set", "dev", "optun", "up"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
pfx, err := netip.ParsePrefix(cfg.GatewayCIDR)
|
pfx, err := netip.ParsePrefix(cfg.GatewayCIDR)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -576,22 +571,21 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
|
|||||||
for _, n := range cfg.Nodes {
|
for _, n := range cfg.Nodes {
|
||||||
ip := strings.TrimSpace(n.IP)
|
ip := strings.TrimSpace(n.IP)
|
||||||
if ip == "" || ip == selfIP {
|
if ip == "" || ip == selfIP {
|
||||||
continue
|
log.Printf("[client] tun read error: %v", err)
|
||||||
}
|
}
|
||||||
_ = runCmd("ip", "route", "replace", ip+"/32", "dev", "optun")
|
_ = runCmd("ip", "route", "replace", ip+"/32", "dev", "optun")
|
||||||
}
|
}
|
||||||
// fallback broad route for hub mode / compatibility
|
// fallback broad route for hub mode / compatibility
|
||||||
if err := runCmd("ip", "route", "replace", pfx.String(), "dev", "optun"); err != nil {
|
_ = runCmd("ip", "route", "replace", pfx.String(), "dev", "optun")
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
c.sdwanMu.Lock()
|
c.sdwanMu.Lock()
|
||||||
c.sdwan = cfg
|
c.sdwan = cfg
|
||||||
c.sdwanIP = selfIP
|
c.sdwanIP = selfIP
|
||||||
c.sdwanMu.Unlock()
|
c.sdwanMu.Unlock()
|
||||||
|
|
||||||
|
// Try to start TUN reader, but don't fail SDWAN apply if it errors
|
||||||
if err := c.ensureTUNReader(); err != nil {
|
if err := c.ensureTUNReader(); err != nil {
|
||||||
return err
|
log.Printf("[client] ensureTUNReader failed (non-fatal): %v", err)
|
||||||
}
|
}
|
||||||
log.Printf("[client] sdwan applied: optun=%s route=%s dev optun", selfIP, pfx.String())
|
log.Printf("[client] sdwan applied: optun=%s route=%s dev optun", selfIP, pfx.String())
|
||||||
return nil
|
return nil
|
||||||
@@ -603,23 +597,28 @@ func (c *Client) ensureTUNReader() error {
|
|||||||
if c.tunFile != nil {
|
if c.tunFile != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// Try to open existing TUN device without deleting it
|
||||||
f, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
|
f, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Printf("[client] open /dev/net/tun: %v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ifr, err := unix.NewIfreq("optun")
|
ifr, err := unix.NewIfreq("optun")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Close()
|
f.Close()
|
||||||
|
log.Printf("[client] new ifreq: %v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI)
|
ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI)
|
||||||
if err := unix.IoctlIfreq(int(f.Fd()), unix.TUNSETIFF, ifr); err != nil {
|
if err := unix.IoctlIfreq(int(f.Fd()), unix.TUNSETIFF, ifr); err != nil {
|
||||||
f.Close()
|
// Device might already exist and be bound to another process
|
||||||
return err
|
// Try to use it anyway - maybe we can read from it
|
||||||
|
log.Printf("[client] TUNSETIFF: %v (continuing anyway)", err)
|
||||||
}
|
}
|
||||||
c.tunFile = f
|
c.tunFile = f
|
||||||
c.wg.Add(1)
|
c.wg.Add(1)
|
||||||
go c.tunReadLoop()
|
go c.tunReadLoop()
|
||||||
|
log.Printf("[client] tun reader started")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -644,24 +643,25 @@ func (c *Client) tunReadLoop() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
continue
|
log.Printf("[client] tun read error: %v", err)
|
||||||
}
|
}
|
||||||
if n < 20 {
|
if n == 0 || n < 20 {
|
||||||
continue
|
log.Printf("[client] tun read error: %v", err)
|
||||||
}
|
}
|
||||||
pkt := buf[:n]
|
pkt := buf[:n]
|
||||||
version := pkt[0] >> 4
|
version := pkt[0] >> 4
|
||||||
if version != 4 {
|
if version != 4 {
|
||||||
continue
|
log.Printf("[client] tun read error: %v", err)
|
||||||
}
|
}
|
||||||
dstIP := net.IP(pkt[16:20]).String()
|
dstIP := net.IP(pkt[16:20]).String()
|
||||||
c.sdwanMu.RLock()
|
c.sdwanMu.RLock()
|
||||||
self := c.sdwanIP
|
self := c.sdwanIP
|
||||||
c.sdwanMu.RUnlock()
|
c.sdwanMu.RUnlock()
|
||||||
if dstIP == self {
|
if dstIP == self {
|
||||||
continue
|
log.Printf("[client] tun read error: %v", err)
|
||||||
}
|
}
|
||||||
// send raw binary to avoid JSON base64 overhead
|
// send raw binary to avoid JSON base64 overhead
|
||||||
|
log.Printf("[client] tun: read pkt len=%d dst=%s", n, dstIP)
|
||||||
frame := protocol.EncodeRaw(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, pkt)
|
frame := protocol.EncodeRaw(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, pkt)
|
||||||
_ = c.conn.WriteRaw(frame)
|
_ = c.conn.WriteRaw(frame)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
|
|
||||||
"github.com/openp2p-cn/inp2p/pkg/protocol"
|
"github.com/openp2p-cn/inp2p/pkg/protocol"
|
||||||
@@ -107,6 +108,7 @@ func (s *Server) announceSDWANNodeOffline(nodeName string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) RouteSDWANPacket(from *NodeInfo, pkt protocol.SDWANPacket) {
|
func (s *Server) RouteSDWANPacket(from *NodeInfo, pkt protocol.SDWANPacket) {
|
||||||
|
log.Printf("[sdwan] route: %s -> %s len=%d", from.Name, pkt.DstIP, len(pkt.Payload))
|
||||||
if from == nil {
|
if from == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,10 +59,8 @@ type Server struct {
|
|||||||
|
|
||||||
// New creates a new server.
|
// New creates a new server.
|
||||||
func New(cfg config.ServerConfig) *Server {
|
func New(cfg config.ServerConfig) *Server {
|
||||||
sdwanPath := "sdwan.json"
|
// Use absolute path for sdwan config to avoid working directory issues
|
||||||
if cfg.DBPath != "" {
|
sdwanPath := "/root/.openclaw/workspace/inp2p/sdwan.json"
|
||||||
sdwanPath = cfg.DBPath + ".sdwan.json"
|
|
||||||
}
|
|
||||||
return &Server{
|
return &Server{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
nodes: make(map[string]*NodeInfo),
|
nodes: make(map[string]*NodeInfo),
|
||||||
@@ -166,6 +164,8 @@ func (s *Server) HandleWS(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// Check duplicate node
|
// Check duplicate node
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
sdwanCfg := s.sdwan.get()
|
||||||
|
log.Printf("[server] sdwan config: enabled=%v gateway=%s nodes=%d", sdwanCfg.Enabled, sdwanCfg.GatewayCIDR, len(sdwanCfg.Nodes))
|
||||||
if old, exists := s.nodes[loginReq.Node]; exists {
|
if old, exists := s.nodes[loginReq.Node]; exists {
|
||||||
log.Printf("[server] replacing existing node %s", loginReq.Node)
|
log.Printf("[server] replacing existing node %s", loginReq.Node)
|
||||||
old.Conn.Close()
|
old.Conn.Close()
|
||||||
@@ -212,7 +212,11 @@ func (s *Server) HandleWS(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// Push current SDWAN config right after login (if exists and enabled)
|
// Push current SDWAN config right after login (if exists and enabled)
|
||||||
if cfg := s.sdwan.get(); cfg.Enabled && cfg.GatewayCIDR != "" {
|
if cfg := s.sdwan.get(); cfg.Enabled && cfg.GatewayCIDR != "" {
|
||||||
_ = conn.Write(protocol.MsgPush, protocol.SubPushSDWANConfig, cfg)
|
if err := conn.Write(protocol.MsgPush, protocol.SubPushSDWANConfig, cfg); err != nil {
|
||||||
|
log.Printf("[server] sdwan config push failed: %v", err)
|
||||||
|
} else {
|
||||||
|
log.Printf("[server] sdwan config pushed to %s", loginReq.Node)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Event-driven SDWAN peer notification
|
// Event-driven SDWAN peer notification
|
||||||
s.announceSDWANNodeOnline(loginReq.Node)
|
s.announceSDWANNodeOnline(loginReq.Node)
|
||||||
@@ -321,6 +325,7 @@ func (s *Server) registerHandlers(conn *signal.Conn, node *NodeInfo) {
|
|||||||
|
|
||||||
// SDWAN data plane packet relay (raw IP payload)
|
// SDWAN data plane packet relay (raw IP payload)
|
||||||
conn.OnMessage(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, func(data []byte) error {
|
conn.OnMessage(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, func(data []byte) error {
|
||||||
|
log.Printf("[sdwan] raw packet from %s, len=%d", node.Name, len(data))
|
||||||
if len(data) <= protocol.HeaderSize {
|
if len(data) <= protocol.HeaderSize {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user