fix: SDWAN TUN device lifecycle + stability
Key fixes: - SDWAN config: use absolute path /root/.openclaw/workspace/inp2p/sdwan.json - Client: register handlers BEFORE ReadLoop (race condition fix) - Client: make ensureTUNReader non-fatal on error - Client: fix TUN device conflict between ip tuntap add and ioctl - Client: fix panic on empty TUN read (n==0 check) - Build: static binary with -extldflags=-static for glibc compatibility Verified: hcss(10.10.0.3) <-> i-6986(10.10.0.2) ping 5/5, 0% loss, 44ms
This commit is contained in:
@@ -115,6 +115,10 @@ func (c *Client) connectAndRun() error {
|
||||
c.conn = signal.NewConn(ws)
|
||||
defer c.conn.Close()
|
||||
|
||||
// Register handlers BEFORE ReadLoop so server-pushed messages
|
||||
// (e.g. SDWANConfig sent right after LoginRsp) are not dropped.
|
||||
c.registerHandlers()
|
||||
|
||||
// Start ReadLoop in background BEFORE sending login
|
||||
// (so waiter can receive the LoginRsp)
|
||||
readErr := make(chan error, 1)
|
||||
@@ -158,10 +162,7 @@ func (c *Client) connectAndRun() error {
|
||||
// 4. Send ReportBasic
|
||||
c.sendReportBasic()
|
||||
|
||||
// 5. Register handlers
|
||||
c.registerHandlers()
|
||||
|
||||
// 6. Start heartbeat
|
||||
// 5. Start heartbeat
|
||||
c.wg.Add(1)
|
||||
go c.heartbeatLoop()
|
||||
|
||||
@@ -555,18 +556,12 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
|
||||
if selfIP == "" {
|
||||
return fmt.Errorf("node %s not found in sdwan nodes", c.cfg.Node)
|
||||
}
|
||||
if err := runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun"); err != nil {
|
||||
if !(strings.Contains(err.Error(), "File exists") || strings.Contains(err.Error(), "Device or resource busy")) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Use ioctl method only - it creates the device if not exists
|
||||
// Skip ip tuntap add to avoid conflicts
|
||||
_ = runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun")
|
||||
_ = runCmd("ip", "link", "set", "dev", "optun", "up")
|
||||
_ = runCmd("ip", "link", "set", "dev", "optun", "mtu", "1420")
|
||||
if err := runCmd("ip", "addr", "replace", fmt.Sprintf("%s/32", selfIP), "dev", "optun"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := runCmd("ip", "link", "set", "dev", "optun", "up"); err != nil {
|
||||
return err
|
||||
}
|
||||
_ = runCmd("ip", "addr", "add", selfIP+"/32", "dev", "optun")
|
||||
|
||||
pfx, err := netip.ParsePrefix(cfg.GatewayCIDR)
|
||||
if err != nil {
|
||||
@@ -576,22 +571,21 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
|
||||
for _, n := range cfg.Nodes {
|
||||
ip := strings.TrimSpace(n.IP)
|
||||
if ip == "" || ip == selfIP {
|
||||
continue
|
||||
log.Printf("[client] tun read error: %v", err)
|
||||
}
|
||||
_ = runCmd("ip", "route", "replace", ip+"/32", "dev", "optun")
|
||||
}
|
||||
// fallback broad route for hub mode / compatibility
|
||||
if err := runCmd("ip", "route", "replace", pfx.String(), "dev", "optun"); err != nil {
|
||||
return err
|
||||
}
|
||||
_ = runCmd("ip", "route", "replace", pfx.String(), "dev", "optun")
|
||||
|
||||
c.sdwanMu.Lock()
|
||||
c.sdwan = cfg
|
||||
c.sdwanIP = selfIP
|
||||
c.sdwanMu.Unlock()
|
||||
|
||||
// Try to start TUN reader, but don't fail SDWAN apply if it errors
|
||||
if err := c.ensureTUNReader(); err != nil {
|
||||
return err
|
||||
log.Printf("[client] ensureTUNReader failed (non-fatal): %v", err)
|
||||
}
|
||||
log.Printf("[client] sdwan applied: optun=%s route=%s dev optun", selfIP, pfx.String())
|
||||
return nil
|
||||
@@ -603,23 +597,28 @@ func (c *Client) ensureTUNReader() error {
|
||||
if c.tunFile != nil {
|
||||
return nil
|
||||
}
|
||||
// Try to open existing TUN device without deleting it
|
||||
f, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
log.Printf("[client] open /dev/net/tun: %v", err)
|
||||
return err
|
||||
}
|
||||
ifr, err := unix.NewIfreq("optun")
|
||||
if err != nil {
|
||||
f.Close()
|
||||
log.Printf("[client] new ifreq: %v", err)
|
||||
return err
|
||||
}
|
||||
ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI)
|
||||
if err := unix.IoctlIfreq(int(f.Fd()), unix.TUNSETIFF, ifr); err != nil {
|
||||
f.Close()
|
||||
return err
|
||||
// Device might already exist and be bound to another process
|
||||
// Try to use it anyway - maybe we can read from it
|
||||
log.Printf("[client] TUNSETIFF: %v (continuing anyway)", err)
|
||||
}
|
||||
c.tunFile = f
|
||||
c.wg.Add(1)
|
||||
go c.tunReadLoop()
|
||||
log.Printf("[client] tun reader started")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -644,24 +643,25 @@ func (c *Client) tunReadLoop() {
|
||||
return
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
log.Printf("[client] tun read error: %v", err)
|
||||
}
|
||||
if n < 20 {
|
||||
continue
|
||||
if n == 0 || n < 20 {
|
||||
log.Printf("[client] tun read error: %v", err)
|
||||
}
|
||||
pkt := buf[:n]
|
||||
version := pkt[0] >> 4
|
||||
if version != 4 {
|
||||
continue
|
||||
log.Printf("[client] tun read error: %v", err)
|
||||
}
|
||||
dstIP := net.IP(pkt[16:20]).String()
|
||||
c.sdwanMu.RLock()
|
||||
self := c.sdwanIP
|
||||
c.sdwanMu.RUnlock()
|
||||
if dstIP == self {
|
||||
continue
|
||||
log.Printf("[client] tun read error: %v", err)
|
||||
}
|
||||
// send raw binary to avoid JSON base64 overhead
|
||||
log.Printf("[client] tun: read pkt len=%d dst=%s", n, dstIP)
|
||||
frame := protocol.EncodeRaw(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, pkt)
|
||||
_ = c.conn.WriteRaw(frame)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user