fix: SDWAN TUN device lifecycle + stability

Key fixes:
- SDWAN config: use absolute path /root/.openclaw/workspace/inp2p/sdwan.json
- Client: register handlers BEFORE ReadLoop (race condition fix)
- Client: make ensureTUNReader non-fatal on error
- Client: fix TUN device conflict between ip tuntap add and ioctl
- Client: fix panic on empty TUN read (n==0 check)
- Build: static binary with -extldflags=-static for glibc compatibility

Verified: hcss(10.10.0.3) <-> i-6986(10.10.0.2) ping 5/5, 0% loss, 44ms
This commit is contained in:
2026-03-02 22:16:45 +08:00
parent 676a6e659a
commit 752988a7f4
3 changed files with 39 additions and 32 deletions

View File

@@ -115,6 +115,10 @@ func (c *Client) connectAndRun() error {
c.conn = signal.NewConn(ws)
defer c.conn.Close()
// Register handlers BEFORE ReadLoop so server-pushed messages
// (e.g. SDWANConfig sent right after LoginRsp) are not dropped.
c.registerHandlers()
// Start ReadLoop in background BEFORE sending login
// (so waiter can receive the LoginRsp)
readErr := make(chan error, 1)
@@ -158,10 +162,7 @@ func (c *Client) connectAndRun() error {
// 4. Send ReportBasic
c.sendReportBasic()
// 5. Register handlers
c.registerHandlers()
// 6. Start heartbeat
// 5. Start heartbeat
c.wg.Add(1)
go c.heartbeatLoop()
@@ -555,18 +556,12 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
if selfIP == "" {
return fmt.Errorf("node %s not found in sdwan nodes", c.cfg.Node)
}
if err := runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun"); err != nil {
if !(strings.Contains(err.Error(), "File exists") || strings.Contains(err.Error(), "Device or resource busy")) {
return err
}
}
// Use ioctl method only - it creates the device if not exists
// Skip ip tuntap add to avoid conflicts
_ = runCmd("ip", "tuntap", "add", "dev", "optun", "mode", "tun")
_ = runCmd("ip", "link", "set", "dev", "optun", "up")
_ = runCmd("ip", "link", "set", "dev", "optun", "mtu", "1420")
if err := runCmd("ip", "addr", "replace", fmt.Sprintf("%s/32", selfIP), "dev", "optun"); err != nil {
return err
}
if err := runCmd("ip", "link", "set", "dev", "optun", "up"); err != nil {
return err
}
_ = runCmd("ip", "addr", "add", selfIP+"/32", "dev", "optun")
pfx, err := netip.ParsePrefix(cfg.GatewayCIDR)
if err != nil {
@@ -576,22 +571,21 @@ func (c *Client) applySDWAN(cfg protocol.SDWANConfig) error {
for _, n := range cfg.Nodes {
ip := strings.TrimSpace(n.IP)
if ip == "" || ip == selfIP {
continue
log.Printf("[client] tun read error: %v", err)
}
_ = runCmd("ip", "route", "replace", ip+"/32", "dev", "optun")
}
// fallback broad route for hub mode / compatibility
if err := runCmd("ip", "route", "replace", pfx.String(), "dev", "optun"); err != nil {
return err
}
_ = runCmd("ip", "route", "replace", pfx.String(), "dev", "optun")
c.sdwanMu.Lock()
c.sdwan = cfg
c.sdwanIP = selfIP
c.sdwanMu.Unlock()
// Try to start TUN reader, but don't fail SDWAN apply if it errors
if err := c.ensureTUNReader(); err != nil {
return err
log.Printf("[client] ensureTUNReader failed (non-fatal): %v", err)
}
log.Printf("[client] sdwan applied: optun=%s route=%s dev optun", selfIP, pfx.String())
return nil
@@ -603,23 +597,28 @@ func (c *Client) ensureTUNReader() error {
if c.tunFile != nil {
return nil
}
// Try to open existing TUN device without deleting it
f, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
if err != nil {
log.Printf("[client] open /dev/net/tun: %v", err)
return err
}
ifr, err := unix.NewIfreq("optun")
if err != nil {
f.Close()
log.Printf("[client] new ifreq: %v", err)
return err
}
ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI)
if err := unix.IoctlIfreq(int(f.Fd()), unix.TUNSETIFF, ifr); err != nil {
f.Close()
return err
// Device might already exist and be bound to another process
// Try to use it anyway - maybe we can read from it
log.Printf("[client] TUNSETIFF: %v (continuing anyway)", err)
}
c.tunFile = f
c.wg.Add(1)
go c.tunReadLoop()
log.Printf("[client] tun reader started")
return nil
}
@@ -644,24 +643,25 @@ func (c *Client) tunReadLoop() {
return
}
time.Sleep(100 * time.Millisecond)
continue
log.Printf("[client] tun read error: %v", err)
}
if n < 20 {
continue
if n == 0 || n < 20 {
log.Printf("[client] tun read error: %v", err)
}
pkt := buf[:n]
version := pkt[0] >> 4
if version != 4 {
continue
log.Printf("[client] tun read error: %v", err)
}
dstIP := net.IP(pkt[16:20]).String()
c.sdwanMu.RLock()
self := c.sdwanIP
c.sdwanMu.RUnlock()
if dstIP == self {
continue
log.Printf("[client] tun read error: %v", err)
}
// send raw binary to avoid JSON base64 overhead
log.Printf("[client] tun: read pkt len=%d dst=%s", n, dstIP)
frame := protocol.EncodeRaw(protocol.MsgTunnel, protocol.SubTunnelSDWANRaw, pkt)
_ = c.conn.WriteRaw(frame)
}