From d4129dbc7a3852cfc562366eaef56c0df3c29a1c Mon Sep 17 00:00:00 2001 From: Lucas Manning Date: Tue, 30 Jun 2026 01:14:45 +0000 Subject: [PATCH] fuse: add host FD passthrough for external FUSE servers --- g3doc/user_guide/BUILD | 8 + g3doc/user_guide/fuse.md | 136 +++++ pkg/sentry/fsimpl/fuse/BUILD | 4 + pkg/sentry/fsimpl/fuse/connection.go | 51 +- pkg/sentry/fsimpl/fuse/file.go | 24 +- pkg/sentry/fsimpl/fuse/fusefs.go | 82 ++- pkg/sentry/fsimpl/fuse/host_connection.go | 250 +++++++++ .../fuse/host_connection_integration_test.go | 504 ++++++++++++++++++ .../fsimpl/fuse/host_connection_test.go | 387 ++++++++++++++ pkg/sentry/fsimpl/fuse/request_response.go | 4 + pkg/sentry/fsimpl/fuse/save_restore.go | 4 + pkg/sentry/fsimpl/host/host.go | 10 + pkg/sentry/vfs/filesystem.go | 5 + test/fuse_host/BUILD | 30 ++ test/fuse_host/fuse_host_test.go | 113 ++++ test/fuse_host/server.go | 314 +++++++++++ test/fuse_host/workload/BUILD | 15 + test/fuse_host/workload/workload.go | 97 ++++ 18 files changed, 2010 insertions(+), 28 deletions(-) create mode 100644 g3doc/user_guide/fuse.md create mode 100644 pkg/sentry/fsimpl/fuse/host_connection.go create mode 100644 pkg/sentry/fsimpl/fuse/host_connection_integration_test.go create mode 100644 pkg/sentry/fsimpl/fuse/host_connection_test.go create mode 100644 test/fuse_host/BUILD create mode 100644 test/fuse_host/fuse_host_test.go create mode 100644 test/fuse_host/server.go create mode 100644 test/fuse_host/workload/BUILD create mode 100644 test/fuse_host/workload/workload.go diff --git a/g3doc/user_guide/BUILD b/g3doc/user_guide/BUILD index e7e93cbf00..68b6017c01 100644 --- a/g3doc/user_guide/BUILD +++ b/g3doc/user_guide/BUILD @@ -49,6 +49,14 @@ doc( weight = "40", ) +doc( + name = "fuse", + src = "fuse.md", + category = "User Guide", + permalink = "/docs/user_guide/fuse/", + weight = "41", +) + doc( name = "networking", src = "networking.md", diff --git a/g3doc/user_guide/fuse.md b/g3doc/user_guide/fuse.md new file mode 100644 index 0000000000..8904950fbc --- /dev/null +++ b/g3doc/user_guide/fuse.md @@ -0,0 +1,136 @@ +# FUSE + +[TOC] + +gVisor supports [FUSE] (Filesystem in Userspace), allowing userspace programs to +serve filesystems inside a sandbox. There are two modes of operation: + +* **In-sandbox FUSE**: A FUSE daemon runs inside the sandbox and communicates + with the gVisor kernel via `/dev/fuse`. This is the standard FUSE model. +* **External FUSE server**: A FUSE server runs on the host, outside the + sandbox, and communicates with gVisor over a socketpair passed into the + sandbox as a host file descriptor. This is useful when the filesystem + implementation must access resources that are not available inside the + sandbox. + +## External FUSE Server + +The external FUSE server feature allows a host-side process to serve a FUSE +filesystem into a gVisor sandbox. The host process and the sandbox communicate +over a Unix socketpair using the standard FUSE protocol. This approach avoids +the performance penalty incurred by context switching through the I/O +proxy mechansim that's otherwise used to expose host filesystems. + +### How It Works + +1. The host creates a Unix socketpair (`SOCK_SEQPACKET`). +2. One end of the socketpair is passed into the sandbox using the `--pass-fd` + flag on `runsc run` or `runsc create`. +3. The other end is given to a FUSE server process running on the host. +4. Inside the sandbox, the application mounts a FUSE filesystem using the + passed file descriptor. +5. All FUSE operations (read, write, lookup, etc.) are forwarded over the + socketpair to the host FUSE server, which performs the actual I/O. + +### Setup + +#### 1. Create the socketpair and start the FUSE server + +The host process creates a socketpair and starts the FUSE server with one end: + +```bash +# Example: create a socketpair and pass FD 4 to the FUSE server. +# The FUSE server reads FUSE requests from its FD and responds with +# the standard FUSE protocol (FUSEHeaderIn/Out framing). +./my_fuse_server --fd=4 --backing-dir=/data/shared +``` + +The FUSE server must implement the FUSE kernel protocol: it reads +`FUSEHeaderIn`-framed requests and writes `FUSEHeaderOut`-framed responses. At +minimum, it should handle `FUSE_INIT`, `FUSE_GETATTR`, `FUSE_LOOKUP`, +`FUSE_OPEN`, `FUSE_READ`, `FUSE_RELEASE`, and `FUSE_ACCESS`. Additional +opcodes like `FUSE_WRITE`, `FUSE_FLUSH`, `FUSE_STATFS`, and `FUSE_CREATE` can +be added as needed. + +#### 2. Pass the FD into the sandbox + +Use the `--pass-fd` flag to map the host-side socketpair FD into the sandbox: + +```bash +runsc run \ + --pass-fd=3:100 \ + --bundle=/path/to/bundle \ + my-container +``` + +The format is `--pass-fd=HOST_FD:GUEST_FD`. In this example, host FD 3 becomes +FD 100 inside the sandbox. The `--pass-fd` flag can be specified multiple times +to pass additional file descriptors. + +#### 3. Mount the FUSE filesystem inside the container + +Inside the sandbox, the application mounts a FUSE filesystem referencing the +passed FD: + +```c +// Mount using the passed file descriptor. +mount("fuse", "/mnt/shared", "fuse", MS_NODEV | MS_NOSUID, + "fd=100,user_id=0,group_id=0,rootmode=40000"); +``` + +Or equivalently from a shell: + +```bash +mount -t fuse fuse /mnt/shared -o fd=100,user_id=0,group_id=0,rootmode=40000 +``` + +The mount options are: + +* `fd=N`: The file descriptor number inside the sandbox. +* `user_id=UID`: The UID that owns the mount. +* `group_id=GID`: The GID that owns the mount. +* `rootmode=MODE`: The permission mode of the root inode (octal). Use `40000` + for a directory. + +### Example: End-to-End with a Socketpair + +Here is a complete example in Go that sets up the host side: + +```go +// Create a socketpair for FUSE communication. +fds, _ := unix.Socketpair(unix.AF_UNIX, unix.SOCK_SEQPACKET, 0) + +// fds[0] goes into the sandbox, fds[1] goes to the FUSE server. +sandboxFile := os.NewFile(uintptr(fds[0]), "fuse-sandbox") +serverFD := fds[1] + +// Start the FUSE server on the host with the server-side FD. +go myFuseServer.Serve(serverFD, "/data/backing") + +// Launch the sandbox with the FD passed in. +cmd := exec.Command("runsc", "run", + "--pass-fd=3:100", // host FD 3 → guest FD 100 + "--bundle="+bundleDir, + containerID, +) +cmd.ExtraFiles = []*os.File{sandboxFile} // FD 3 in the child process +cmd.Run() +``` + +### Limitations + +* **No /dev/fuse**: The external path does not use `/dev/fuse`. The + application mounts FUSE using the passed socketpair FD directly. +* **FUSE protocol only**: The host server must implement the raw FUSE kernel + protocol. Higher-level FUSE libraries (e.g., libfuse) typically expect + `/dev/fuse` and may not work directly over a socketpair without adaptation. + +## In-Sandbox FUSE + +gVisor also supports the standard FUSE model where both the FUSE daemon and the +application run inside the sandbox. The daemon opens `/dev/fuse`, and the +application mounts a FUSE filesystem using the resulting file descriptor. This +works the same as FUSE on a regular Linux system, with the gVisor kernel +handling the FUSE protocol internally. + +[FUSE]: https://www.kernel.org/doc/html/latest/filesystems/fuse.html diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 4f72e73efa..19e770a0d0 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -63,6 +63,7 @@ go_library( "directory.go", "file.go", "fusefs.go", + "host_connection.go", "inode.go", "inode_connection.go", "inode_refs.go", @@ -110,6 +111,8 @@ go_test( srcs = [ "connection_test.go", "dev_test.go", + "host_connection_integration_test.go", + "host_connection_test.go", "utils_test.go", "xattr_test.go", ], @@ -119,6 +122,7 @@ go_test( "//pkg/context", "//pkg/errors/linuxerr", "//pkg/fspath", + "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel", diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go index 8c79e0fa21..7cc181bede 100644 --- a/pkg/sentry/fsimpl/fuse/connection.go +++ b/pkg/sentry/fsimpl/fuse/connection.go @@ -43,6 +43,34 @@ const ( fuseDefaultMaxPagesPerReq = 32 ) +// fuseConn abstracts the FUSE request/response transport. The connection +// struct delegates call dispatch to its fuseConn implementation. +type fuseConn interface { + call(ctx context.Context, r *Request) (*Response, error) + release(ctx context.Context) +} + +// deviceConn implements fuseConn for the in-sandbox /dev/fuse path. +// It uses the queue-based mechanism where the FUSE daemon reads requests +// from and writes responses to the DeviceFD. +type deviceConn struct { + conn *connection +} + +func (dc *deviceConn) call(ctx context.Context, r *Request) (*Response, error) { + fut, err := dc.conn.callFuture(ctx, r) + if err != nil { + return nil, linuxError(err) + } + res, err := fut.resolve(ctx) + if err != nil { + return res, linuxError(err) + } + return res, nil +} + +func (dc *deviceConn) release(ctx context.Context) {} + // connection is the struct by which the sentry communicates with the FUSE server daemon. // // Lock order: @@ -54,6 +82,10 @@ const ( type connection struct { connectionRefs + // fuseConn is the transport implementation. For the DeviceFD path this + // is a *deviceConn; for host passthrough this is a *hostConnection. + fuseConn fuseConn `state:"nosave"` + // We target FUSE 7.23. // The following FUSE_INIT flags are currently unsupported by this implementation: // - FUSE_EXPORT_SUPPORT @@ -309,7 +341,12 @@ func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOpti // synchronization and without checking if fuseFD has already been used to // mount another filesystem. - // Create the writeBuf for the header to be stored in. + return newFUSEConnectionOpts(opts) +} + +// newFUSEConnectionOpts creates a FUSE connection with the given options. +// This is used by both the DeviceFD path and the host FD passthrough path. +func newFUSEConnectionOpts(opts *filesystemOptions) (*connection, error) { conn := &connection{ completions: make(map[linux.FUSEOpID]*futureResponse), fullQueueCh: make(chan struct{}, opts.maxActiveRequests), @@ -321,6 +358,7 @@ func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOpti initializedChan: make(chan struct{}), connected: true, } + conn.fuseConn = &deviceConn{conn: conn} conn.InitRefs() return conn, nil } @@ -379,16 +417,7 @@ func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error) return nil, linuxerr.ECONNREFUSED } - fut, err := conn.callFuture(ctx, r) - if err != nil { - return nil, linuxError(err) - } - - res, err := fut.resolve(ctx) - if err != nil { - return res, linuxError(err) - } - return res, nil + return conn.fuseConn.call(ctx, r) } // callFuture makes a request to the server and returns a future response. diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go index 0ddb6d3349..8a3c103b76 100644 --- a/pkg/sentry/fsimpl/fuse/file.go +++ b/pkg/sentry/fsimpl/fuse/file.go @@ -69,8 +69,8 @@ func (fd *fileDescription) statusFlags() uint32 { // Release implements vfs.FileDescriptionImpl.Release. func (fd *fileDescription) Release(ctx context.Context) { // no need to release if FUSE server doesn't implement Open. - conn := fd.inode().fs.conn - if conn.noOpen { + fs := fd.inode().fs + if fs.conn.noOpen { return } @@ -89,19 +89,19 @@ func (fd *fileDescription) Release(ctx context.Context) { opcode = linux.FUSE_RELEASE } // Ignoring errors and FUSE server replies is analogous to Linux's behavior. - req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, opcode, &in) + req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, opcode, &in) // The reply will be ignored since no callback is defined in asyncCallBack(). - conn.Call(ctx, req) + fs.conn.Call(ctx, req) } // OnClose implements vfs.FileDescriptionImpl.OnClose. func (fd *fileDescription) OnClose(ctx context.Context) error { inode := fd.inode() - conn := inode.fs.conn + fs := inode.fs inode.attrMu.Lock() defer inode.attrMu.Unlock() - if conn.noOpen { + if fs.conn.noOpen { return nil } if fd.OpenFlag&linux.FOPEN_NOFLUSH != 0 { @@ -112,8 +112,8 @@ func (fd *fileDescription) OnClose(ctx context.Context) error { Fh: fd.Fh, LockOwner: 0, // TODO(gvisor.dev/issue/3245): file lock } - req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FLUSH, &in) - res, err := conn.Call(ctx, req) + req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FLUSH, &in) + res, err := fs.conn.Call(ctx, req) if err != nil { return err } @@ -170,9 +170,9 @@ func (fd *fileDescription) Sync(ctx context.Context) error { inode := fd.inode() inode.attrMu.Lock() defer inode.attrMu.Unlock() - conn := inode.fs.conn + fs := inode.fs // no need to proceed if FUSE server doesn't implement Open. - if conn.noOpen { + if fs.conn.noOpen { return linuxerr.EINVAL } @@ -181,9 +181,9 @@ func (fd *fileDescription) Sync(ctx context.Context) error { FsyncFlags: fd.statusFlags(), } // Ignoring errors and FUSE server replies is analogous to Linux's behavior. - req := conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FSYNC, &in) + req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), inode.nodeID, linux.FUSE_FSYNC, &in) // The reply will be ignored since no callback is defined in asyncCallBack(). - conn.CallAsync(ctx, req) + fs.conn.CallAsync(ctx, req) return nil } diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 5264903c67..e4297e9f07 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -19,6 +19,7 @@ import ( "math" "strconv" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" @@ -88,7 +89,8 @@ type filesystem struct { devMinor uint32 // conn is used for communication between the FUSE server - // daemon and the sentry fusefs. + // daemon and the sentry fusefs. It holds shared protocol state and + // delegates call dispatch to its internal fuseConn transport. conn *connection // opts is the options the fusefs is initialized with. @@ -130,14 +132,36 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } defer fuseFDGeneric.DecRef(ctx) fuseFD, ok := fuseFDGeneric.Impl().(*DeviceFD) - if !ok { - log.Warningf("%s.GetFilesystem: device FD is %T, not a FUSE device", fsType.Name(), fuseFDGeneric) + if ok { + return fsType.getFilesystemDeviceFD(ctx, vfsObj, creds, kernelTask, fuseFD, devMinor, fsopts) + } + + // Check if this is a host FD. Try the file description first (for + // regular files, pipes), then the dentry inode (for sockets, which + // have a different file description type but the same host inode). + rawHostFD := -1 + if hfd, ok := fuseFDGeneric.Impl().(vfs.HostFDProvider); ok { + rawHostFD = hfd.HostFD() + } else if d := fuseFDGeneric.Dentry(); d != nil { + if kd, ok := d.Impl().(*kernfs.Dentry); ok { + if hfd, ok := kd.Inode().(vfs.HostFDProvider); ok { + rawHostFD = hfd.HostFD() + } + } + } + if rawHostFD == -1 { + log.Warningf("%s.GetFilesystem: fd is %T, not a FUSE device or host FD", fsType.Name(), fuseFDGeneric.Impl()) return nil, nil, linuxerr.EINVAL } + return fsType.getFilesystemHostFD(ctx, vfsObj, creds, kernelTask, int32(rawHostFD), devMinor, fsopts) +} + +// getFilesystemDeviceFD creates a FUSE filesystem backed by an in-sandbox +// /dev/fuse DeviceFD. +func (fsType FilesystemType) getFilesystemDeviceFD(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, kernelTask *kernel.Task, fuseFD *DeviceFD, devMinor uint32, fsopts *filesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { fuseFD.mu.Lock() connected := fuseFD.connected() - // Create a new FUSE filesystem. fs, err := newFUSEFilesystem(ctx, vfsObj, &fsType, fuseFD, devMinor, fsopts) if err != nil { log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err) @@ -155,9 +179,56 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } } - // root is the fusefs root directory. root := fs.newRoot(ctx, creds, fsopts.rootMode) + return fs.VFSFilesystem(), root.VFSDentry(), nil +} + +// getFilesystemHostFD creates a FUSE filesystem that communicates with a FUSE +// server running on the host via a host file descriptor. +func (fsType FilesystemType) getFilesystemHostFD(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, kernelTask *kernel.Task, hostFD int32, devMinor uint32, fsopts *filesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + // Dup the host FD so that the FUSE connection owns its own copy. + // The original may be shared with or closed by the host import path + // (e.g. socket endpoints take ownership of the FD). + dupFD, err := unix.Dup(int(hostFD)) + if err != nil { + log.Warningf("%s.getFilesystemHostFD: dup failed: %v", fsType.Name(), err) + return nil, nil, err + } + // The host import path sets the FD to non-blocking for epoll-based I/O. + // The FUSE passthrough connection uses synchronous blocking I/O, so + // clear the non-blocking flag. + if err := unix.SetNonblock(dupFD, false); err != nil { + unix.Close(dupFD) + log.Warningf("%s.getFilesystemHostFD: SetNonblock failed: %v", fsType.Name(), err) + return nil, nil, err + } + conn, err := newFUSEConnectionOpts(fsopts) + if err != nil { + unix.Close(dupFD) + log.Warningf("%s.getFilesystemHostFD: newFUSEConnection failed: %v", fsType.Name(), err) + return nil, nil, err + } + + hostConn := newHostConnection(conn, int32(dupFD)) + conn.fuseConn = hostConn + + fs := &filesystem{ + devMinor: devMinor, + opts: fsopts, + conn: conn, + clock: ktime.RealtimeClockFromContext(ctx), + } + fs.VFSFilesystem().Init(vfsObj, &fsType, fs) + + rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace() + hasSysAdmin := creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs) + if err := hostConn.InitSend(creds, uint32(kernelTask.ThreadID()), hasSysAdmin); err != nil { + log.Warningf("%s.getFilesystemHostFD: InitSend failed: %v", fsType.Name(), err) + return nil, nil, err + } + + root := fs.newRoot(ctx, creds, fsopts.rootMode) return fs.VFSFilesystem(), root.VFSDentry(), nil } @@ -295,6 +366,7 @@ func newFUSEFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, fsTyp // Release implements vfs.FilesystemImpl.Release. func (fs *filesystem) Release(ctx context.Context) { + fs.conn.fuseConn.release(ctx) fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) fs.Filesystem.Release(ctx) } diff --git a/pkg/sentry/fsimpl/fuse/host_connection.go b/pkg/sentry/fsimpl/fuse/host_connection.go new file mode 100644 index 0000000000..c2e5a1c053 --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/host_connection.go @@ -0,0 +1,250 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fuse + +import ( + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" +) + +var respBufPool = sync.Pool{ + New: func() any { + b := make([]byte, linux.FUSE_MIN_READ_BUFFER) + return &b + }, +} + +// hostConnection implements fuseConn for the host FD passthrough path. +// Instead of using the /dev/fuse device within the sandbox, it writes FUSE +// requests to and reads FUSE responses from a host FD. This allows a FUSE +// server running outside the sandbox to serve the filesystem. +// +// Multiple requests can be in flight concurrently. Writes are serialized by +// writeMu, while a background reader goroutine dispatches responses to callers +// via the connection's completions map. +type hostConnection struct { + // conn holds shared FUSE connection state (protocol version, limits, etc). + conn *connection + + // hostFD is the host file descriptor for the FUSE connection. + hostFD int32 + + // writeMu serializes write operations on hostFD. + writeMu sync.Mutex +} + +// newHostConnection creates a hostConnection that communicates over hostFD. +func newHostConnection(conn *connection, hostFD int32) *hostConnection { + return &hostConnection{ + conn: conn, + hostFD: hostFD, + } +} + +// startReader launches the background goroutine that reads responses from the +// host FD and dispatches them to waiting callers. Must be called after the +// FUSE_INIT handshake completes. +func (hc *hostConnection) startReader() { + go hc.readLoop() +} + +// readLoop reads FUSE responses from the host FD and dispatches them to the +// corresponding callers via the connection's completions map. +func (hc *hostConnection) readLoop() { + for { + bufp := respBufPool.Get().(*[]byte) + respBuf := (*bufp)[:linux.FUSE_MIN_READ_BUFFER] + + n, err := unix.Read(int(hc.hostFD), respBuf) + if err != nil || n == 0 { + respBufPool.Put(bufp) + hc.abortPending() + return + } + if n < int(linux.SizeOfFUSEHeaderOut) { + respBufPool.Put(bufp) + log.Warningf("fuse host connection: short read %d bytes, need at least %d", n, linux.SizeOfFUSEHeaderOut) + continue + } + + var hdr linux.FUSEHeaderOut + hdr.UnmarshalUnsafe(respBuf[:linux.SizeOfFUSEHeaderOut]) + + if hdr.Len > uint32(n) { + respBufPool.Put(bufp) + log.Warningf("fuse host connection: response says %d bytes but only read %d", hdr.Len, n) + continue + } + + hc.conn.mu.Lock() + fut, ok := hc.conn.completions[hdr.Unique] + if ok { + delete(hc.conn.completions, hdr.Unique) + fut.hdr = &hdr + copy(fut.buf[:], respBuf[:hdr.Len]) + fut.data = fut.buf[:hdr.Len] + select { + case hc.conn.fullQueueCh <- struct{}{}: + default: + } + hc.conn.numActiveRequests-- + close(fut.ch) + } + hc.conn.mu.Unlock() + respBufPool.Put(bufp) + } +} + +// abortPending wakes all callers blocked on a response with closed channels. +// Called when the reader goroutine exits due to an error or FD closure. +func (hc *hostConnection) abortPending() { + hc.conn.mu.Lock() + defer hc.conn.mu.Unlock() + for id, fut := range hc.conn.completions { + delete(hc.conn.completions, id) + hc.conn.numActiveRequests-- + close(fut.ch) + } +} + +// call implements fuseConn.call. It registers a futureResponse, writes the +// request to the host FD, and blocks until the reader goroutine dispatches +// the matching response. +func (hc *hostConnection) call(ctx context.Context, r *Request) (*Response, error) { + hc.conn.mu.Lock() + if !hc.conn.connected { + hc.conn.mu.Unlock() + return nil, linuxerr.ECONNABORTED + } + hc.conn.numActiveRequests++ + fut := newFutureResponse(r) + hc.conn.completions[r.id] = fut + hc.conn.mu.Unlock() + + if err := hc.writeRequest(r); err != nil { + hc.conn.mu.Lock() + delete(hc.conn.completions, r.id) + hc.conn.numActiveRequests-- + hc.conn.mu.Unlock() + return nil, err + } + + return fut.resolve(ctx) +} + +// Call makes a request to the server via the host FD and blocks until a +// response is received. It mirrors connection.Call but dispatches through the +// host I/O path. +func (hc *hostConnection) Call(ctx context.Context, r *Request) (*Response, error) { + if !hc.conn.isInitialized() && r.hdr.Opcode != linux.FUSE_INIT { + if err := ctx.Block(hc.conn.initializedChan); err != nil { + return nil, linuxError(err) + } + } + + hc.conn.mu.Lock() + connected := hc.conn.connected + connInitError := hc.conn.connInitError + hc.conn.mu.Unlock() + + if !connected { + return nil, linuxerr.ENOTCONN + } + + if connInitError { + return nil, linuxerr.ECONNREFUSED + } + + return hc.call(ctx, r) +} + +// CallAsync makes an async (fire-and-forget) request via the host FD. The +// response is read and discarded. +func (hc *hostConnection) CallAsync(ctx context.Context, r *Request) error { + r.async = true + _, err := hc.Call(ctx, r) + return err +} + +// release implements fuseConn.release. +func (hc *hostConnection) release(ctx context.Context) { + hc.conn.DecRef(ctx) + unix.Close(int(hc.hostFD)) +} + +// writeRequest writes a FUSE request to the host FD under writeMu. +func (hc *hostConnection) writeRequest(r *Request) error { + hc.writeMu.Lock() + defer hc.writeMu.Unlock() + data := r.data + for len(data) > 0 { + n, err := unix.Write(int(hc.hostFD), data) + if err != nil { + return err + } + data = data[n:] + } + return nil +} + +// InitSend performs the FUSE_INIT handshake synchronously over the host FD. +// After a successful handshake, it starts the background reader goroutine +// for concurrent request processing. +func (hc *hostConnection) InitSend(creds *auth.Credentials, pid uint32, hasSysAdminCap bool) error { + in := linux.FUSEInitIn{ + Major: linux.FUSE_KERNEL_VERSION, + Minor: linux.FUSE_KERNEL_MINOR_VERSION, + MaxReadahead: fuseDefaultMaxReadahead, + Flags: fuseDefaultInitFlags, + } + + req := hc.conn.NewRequest(creds, pid, 0, linux.FUSE_INIT, &in) + + if err := hc.writeRequest(req); err != nil { + return err + } + + respBuf := make([]byte, linux.FUSE_MIN_READ_BUFFER) + n, err := unix.Read(int(hc.hostFD), respBuf) + if err != nil { + return err + } + if n < int(linux.SizeOfFUSEHeaderOut) { + return linuxerr.EIO + } + + var hdr linux.FUSEHeaderOut + hdr.UnmarshalUnsafe(respBuf[:linux.SizeOfFUSEHeaderOut]) + + res := &Response{ + opcode: linux.FUSE_INIT, + hdr: hdr, + data: respBuf[:hdr.Len], + } + + hc.conn.mu.Lock() + defer hc.conn.mu.Unlock() + if err := hc.conn.InitRecv(res, hasSysAdminCap); err != nil { + return err + } + + hc.startReader() + return nil +} diff --git a/pkg/sentry/fsimpl/fuse/host_connection_integration_test.go b/pkg/sentry/fsimpl/fuse/host_connection_integration_test.go new file mode 100644 index 0000000000..3700ff299d --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/host_connection_integration_test.go @@ -0,0 +1,504 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fuse + +import ( + "os" + "path/filepath" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// testFUSEServer implements a minimal FUSE protocol server over a socketpair. +// It handles the operations needed to exercise basic filesystem I/O: +// INIT, GETATTR, LOOKUP, OPEN, READ, WRITE, FLUSH, RELEASE. +// +// The server is backed by a real directory on the host filesystem. +type testFUSEServer struct { + fd int + backDir string + nextFh uint64 + openFiles map[uint64]*os.File +} + +func newTestFUSEServer(fd int, backDir string) *testFUSEServer { + return &testFUSEServer{ + fd: fd, + backDir: backDir, + nextFh: 1, + openFiles: make(map[uint64]*os.File), + } +} + +func (s *testFUSEServer) serve(t *testing.T, done chan struct{}) { + t.Helper() + defer close(done) + for { + buf := make([]byte, 64*1024) + n, err := unix.Read(s.fd, buf) + if err != nil || n == 0 { + return + } + if n < int(linux.SizeOfFUSEHeaderIn) { + t.Errorf("fuse server: short request %d bytes", n) + return + } + buf = buf[:n] + + var hdr linux.FUSEHeaderIn + hdr.UnmarshalUnsafe(buf[:linux.SizeOfFUSEHeaderIn]) + payload := buf[linux.SizeOfFUSEHeaderIn:] + + resp := s.handleRequest(&hdr, payload) + if resp == nil { + continue + } + if _, err := unix.Write(s.fd, resp); err != nil { + return + } + } +} + +func (s *testFUSEServer) handleRequest(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + switch hdr.Opcode { + case linux.FUSE_INIT: + return s.handleInit(hdr) + case linux.FUSE_GETATTR: + return s.handleGetAttr(hdr) + case linux.FUSE_LOOKUP: + return s.handleLookup(hdr, payload) + case linux.FUSE_OPEN: + return s.handleOpen(hdr, payload) + case linux.FUSE_READ: + return s.handleRead(hdr, payload) + case linux.FUSE_WRITE: + return s.handleWrite(hdr, payload) + case linux.FUSE_FLUSH: + return s.replyOK(hdr) + case linux.FUSE_RELEASE: + return s.handleRelease(hdr, payload) + case linux.FUSE_ACCESS: + return s.replyOK(hdr) + default: + return s.replyError(hdr, -int32(unix.ENOSYS)) + } +} + +func (s *testFUSEServer) handleInit(hdr *linux.FUSEHeaderIn) []byte { + out := linux.FUSEInitOut{ + Major: linux.FUSE_KERNEL_VERSION, + Minor: linux.FUSE_KERNEL_MINOR_VERSION, + MaxWrite: 65536, + } + return s.marshalReply(hdr, &out) +} + +func (s *testFUSEServer) handleGetAttr(hdr *linux.FUSEHeaderIn) []byte { + path := s.backDir + if hdr.NodeID != linux.FUSE_ROOT_ID { + path = filepath.Join(s.backDir, "testfile") + } + var stat unix.Stat_t + if err := unix.Stat(path, &stat); err != nil { + return s.replyError(hdr, -int32(unix.ENOENT)) + } + out := linux.FUSEAttrOut{ + AttrValid: 1, + Attr: statToFUSEAttr(stat, hdr.NodeID), + } + return s.marshalReply(hdr, &out) +} + +func (s *testFUSEServer) handleLookup(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + nameEnd := 0 + for nameEnd < len(payload) && payload[nameEnd] != 0 { + nameEnd++ + } + name := string(payload[:nameEnd]) + + path := filepath.Join(s.backDir, name) + var stat unix.Stat_t + if err := unix.Stat(path, &stat); err != nil { + return s.replyError(hdr, -int32(unix.ENOENT)) + } + + const childNodeID uint64 = 2 + out := linux.FUSEEntryOut{ + NodeID: childNodeID, + Generation: 1, + EntryValid: 1, + AttrValid: 1, + Attr: statToFUSEAttr(stat, childNodeID), + } + return s.marshalReply(hdr, &out) +} + +func (s *testFUSEServer) handleOpen(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEOpenIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + path := filepath.Join(s.backDir, "testfile") + flags := int(in.Flags) & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR) + f, err := os.OpenFile(path, flags, 0) + if err != nil { + return s.replyError(hdr, -int32(unix.EIO)) + } + + fh := s.nextFh + s.nextFh++ + s.openFiles[fh] = f + + out := linux.FUSEOpenOut{ + Fh: fh, + OpenFlag: linux.FOPEN_DIRECT_IO, + } + return s.marshalReply(hdr, &out) +} + +func (s *testFUSEServer) handleRead(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEReadIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + f, ok := s.openFiles[in.Fh] + if !ok { + return s.replyError(hdr, -int32(unix.EBADF)) + } + + data := make([]byte, in.Size) + n, err := f.ReadAt(data, int64(in.Offset)) + if err != nil && n == 0 { + return s.dataReply(hdr, nil) + } + return s.dataReply(hdr, data[:n]) +} + +func (s *testFUSEServer) handleWrite(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEWriteIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + f, ok := s.openFiles[in.Fh] + if !ok { + return s.replyError(hdr, -int32(unix.EBADF)) + } + + writeData := payload[in.SizeBytes():] + n, err := f.WriteAt(writeData, int64(in.Offset)) + if err != nil { + return s.replyError(hdr, -int32(unix.EIO)) + } + + out := linux.FUSEWriteOut{Size: uint32(n)} + return s.marshalReply(hdr, &out) +} + +func (s *testFUSEServer) handleRelease(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEReleaseIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + if f, ok := s.openFiles[in.Fh]; ok { + f.Close() + delete(s.openFiles, in.Fh) + } + return s.replyOK(hdr) +} + +type marshalUnsafer interface { + SizeBytes() int + MarshalUnsafe(dst []byte) []byte +} + +func (s *testFUSEServer) marshalReply(hdr *linux.FUSEHeaderIn, payload marshalUnsafer) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + payloadSize := payload.SizeBytes() + buf := make([]byte, hdrSize+payloadSize) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize + payloadSize), + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf[:hdrSize]) + payload.MarshalUnsafe(buf[hdrSize:]) + return buf +} + +func (s *testFUSEServer) dataReply(hdr *linux.FUSEHeaderIn, data []byte) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + buf := make([]byte, hdrSize+len(data)) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize + len(data)), + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf[:hdrSize]) + copy(buf[hdrSize:], data) + return buf +} + +func (s *testFUSEServer) replyOK(hdr *linux.FUSEHeaderIn) []byte { + return s.replyError(hdr, 0) +} + +func (s *testFUSEServer) replyError(hdr *linux.FUSEHeaderIn, errno int32) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + buf := make([]byte, hdrSize) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize), + Error: errno, + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf) + return buf +} + +func statToFUSEAttr(stat unix.Stat_t, ino uint64) linux.FUSEAttr { + return linux.FUSEAttr{ + Ino: ino, + Size: uint64(stat.Size), + Blocks: uint64(stat.Blocks), + Atime: uint64(stat.Atim.Sec), + Mtime: uint64(stat.Mtim.Sec), + Ctime: uint64(stat.Ctim.Sec), + AtimeNsec: uint32(stat.Atim.Nsec), + MtimeNsec: uint32(stat.Mtim.Nsec), + CtimeNsec: uint32(stat.Ctim.Nsec), + Mode: stat.Mode, + Nlink: uint32(stat.Nlink), + UID: stat.Uid, + GID: stat.Gid, + BlkSize: uint32(stat.Blksize), + } +} + +// newTestHostFUSEConnection creates a socketpair, starts a FUSE protocol +// server on one end backed by backDir, and returns a hostConnection using +// the other end. The connection is fully initialized via FUSE_INIT. +func newTestHostFUSEConnection(t *testing.T, backDir string) (*hostConnection, chan struct{}, func()) { + t.Helper() + + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_SEQPACKET, 0) + if err != nil { + t.Fatalf("Socketpair: %v", err) + } + + server := newTestFUSEServer(fds[1], backDir) + serverDone := make(chan struct{}) + go server.serve(t, serverDone) + + fsopts := filesystemOptions{ + maxActiveRequests: maxActiveRequestsDefault, + maxRead: 65536, + } + conn, err := newFUSEConnectionOpts(&fsopts) + if err != nil { + unix.Close(fds[0]) + unix.Close(fds[1]) + t.Fatalf("newFUSEConnectionOpts: %v", err) + } + hc := newHostConnection(conn, int32(fds[0])) + + cleanup := func() { + // Shutdown wakes goroutines blocked in read() on either end. + unix.Shutdown(fds[1], unix.SHUT_RDWR) + unix.Shutdown(fds[0], unix.SHUT_RDWR) + <-serverDone + unix.Close(fds[0]) + unix.Close(fds[1]) + } + return hc, serverDone, cleanup +} + +// TestHostFUSEReadFile exercises a full FUSE read through the host passthrough +// path: INIT → LOOKUP → OPEN → READ → RELEASE, with the FUSE server backed +// by a real file on the host. +func TestHostFUSEReadFile(t *testing.T) { + s := setup(t) + defer s.Destroy() + + backDir := t.TempDir() + testData := "hello from the host FUSE server\n" + if err := os.WriteFile(filepath.Join(backDir, "testfile"), []byte(testData), 0644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + hc, _, cleanup := newTestHostFUSEConnection(t, backDir) + defer cleanup() + + creds := auth.CredentialsFromContext(s.Ctx) + + // FUSE_INIT + if err := hc.InitSend(creds, 1, true); err != nil { + t.Fatalf("InitSend: %v", err) + } + if !hc.conn.isInitialized() { + t.Fatal("connection not initialized after InitSend") + } + + // FUSE_LOOKUP "testfile" + lookupIn := linux.FUSELookupIn{Name: linux.CString("testfile")} + lookupReq := hc.conn.NewRequest(creds, 1, linux.FUSE_ROOT_ID, linux.FUSE_LOOKUP, &lookupIn) + lookupResp, err := hc.Call(s.Ctx, lookupReq) + if err != nil { + t.Fatalf("LOOKUP Call: %v", err) + } + if lookupResp.Error() != nil { + t.Fatalf("LOOKUP error: %v", lookupResp.Error()) + } + var entryOut linux.FUSEEntryOut + if err := lookupResp.UnmarshalPayload(&entryOut); err != nil { + t.Fatalf("LOOKUP unmarshal: %v", err) + } + if entryOut.NodeID == 0 { + t.Fatal("LOOKUP returned nodeID 0") + } + + // FUSE_OPEN + openIn := linux.FUSEOpenIn{Flags: uint32(linux.O_RDONLY)} + openReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_OPEN, &openIn) + openResp, err := hc.Call(s.Ctx, openReq) + if err != nil { + t.Fatalf("OPEN Call: %v", err) + } + if openResp.Error() != nil { + t.Fatalf("OPEN error: %v", openResp.Error()) + } + var openOut linux.FUSEOpenOut + if err := openResp.UnmarshalPayload(&openOut); err != nil { + t.Fatalf("OPEN unmarshal: %v", err) + } + + // FUSE_READ + readIn := linux.FUSEReadIn{ + Fh: openOut.Fh, + Offset: 0, + Size: uint32(hostarch.PageSize), + } + readReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_READ, &readIn) + readResp, err := hc.Call(s.Ctx, readReq) + if err != nil { + t.Fatalf("READ Call: %v", err) + } + if readResp.Error() != nil { + t.Fatalf("READ error: %v", readResp.Error()) + } + readData := readResp.data[readResp.hdr.SizeBytes():] + if string(readData) != testData { + t.Fatalf("READ data: got %q, want %q", string(readData), testData) + } + + // FUSE_RELEASE + releaseIn := linux.FUSEReleaseIn{Fh: openOut.Fh} + releaseReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_RELEASE, &releaseIn) + releaseResp, err := hc.Call(s.Ctx, releaseReq) + if err != nil { + t.Fatalf("RELEASE Call: %v", err) + } + if releaseResp.Error() != nil { + t.Fatalf("RELEASE error: %v", releaseResp.Error()) + } +} + +// TestHostFUSEWriteFile exercises a full FUSE write through the host +// passthrough path: INIT → LOOKUP → OPEN → WRITE → RELEASE, then verifies +// the data was written to the backing file on the host. +func TestHostFUSEWriteFile(t *testing.T) { + s := setup(t) + defer s.Destroy() + + backDir := t.TempDir() + if err := os.WriteFile(filepath.Join(backDir, "testfile"), nil, 0644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + hc, _, cleanup := newTestHostFUSEConnection(t, backDir) + defer cleanup() + + creds := auth.CredentialsFromContext(s.Ctx) + + // FUSE_INIT + if err := hc.InitSend(creds, 1, true); err != nil { + t.Fatalf("InitSend: %v", err) + } + + // FUSE_LOOKUP "testfile" + lookupIn := linux.FUSELookupIn{Name: linux.CString("testfile")} + lookupReq := hc.conn.NewRequest(creds, 1, linux.FUSE_ROOT_ID, linux.FUSE_LOOKUP, &lookupIn) + lookupResp, err := hc.Call(s.Ctx, lookupReq) + if err != nil { + t.Fatalf("LOOKUP Call: %v", err) + } + if lookupResp.Error() != nil { + t.Fatalf("LOOKUP error: %v", lookupResp.Error()) + } + var entryOut linux.FUSEEntryOut + if err := lookupResp.UnmarshalPayload(&entryOut); err != nil { + t.Fatalf("LOOKUP unmarshal: %v", err) + } + + // FUSE_OPEN for writing + openIn := linux.FUSEOpenIn{Flags: uint32(linux.O_WRONLY)} + openReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_OPEN, &openIn) + openResp, err := hc.Call(s.Ctx, openReq) + if err != nil { + t.Fatalf("OPEN Call: %v", err) + } + if openResp.Error() != nil { + t.Fatalf("OPEN error: %v", openResp.Error()) + } + var openOut linux.FUSEOpenOut + if err := openResp.UnmarshalPayload(&openOut); err != nil { + t.Fatalf("OPEN unmarshal: %v", err) + } + + // FUSE_WRITE + writeData := []byte("written via host FUSE passthrough\n") + writeIn := linux.FUSEWritePayloadIn{ + Header: linux.FUSEWriteIn{ + Fh: openOut.Fh, + Offset: 0, + Size: uint32(len(writeData)), + }, + Payload: writeData, + } + writeReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_WRITE, &writeIn) + writeResp, err := hc.Call(s.Ctx, writeReq) + if err != nil { + t.Fatalf("WRITE Call: %v", err) + } + if writeResp.Error() != nil { + t.Fatalf("WRITE error: %v", writeResp.Error()) + } + var writeOut linux.FUSEWriteOut + if err := writeResp.UnmarshalPayload(&writeOut); err != nil { + t.Fatalf("WRITE unmarshal: %v", err) + } + if writeOut.Size != uint32(len(writeData)) { + t.Fatalf("WRITE size: got %d, want %d", writeOut.Size, len(writeData)) + } + + // FUSE_RELEASE + releaseIn := linux.FUSEReleaseIn{Fh: openOut.Fh} + releaseReq := hc.conn.NewRequest(creds, 1, entryOut.NodeID, linux.FUSE_RELEASE, &releaseIn) + hc.Call(s.Ctx, releaseReq) + + // Verify the data reached the host filesystem. + got, err := os.ReadFile(filepath.Join(backDir, "testfile")) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(got) != string(writeData) { + t.Fatalf("backing file: got %q, want %q", string(got), string(writeData)) + } +} diff --git a/pkg/sentry/fsimpl/fuse/host_connection_test.go b/pkg/sentry/fsimpl/fuse/host_connection_test.go new file mode 100644 index 0000000000..66afc248c5 --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/host_connection_test.go @@ -0,0 +1,387 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fuse + +import ( + "sync" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// newTestHostConnection creates a hostConnection backed by a socketpair. +// Returns the hostConnection, the server-side FD, and a cleanup function. +// The connection is pre-initialized and the reader goroutine is started. +func newTestHostConnection(t *testing.T) (*hostConnection, int, func()) { + t.Helper() + + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_SEQPACKET, 0) + if err != nil { + t.Fatalf("Socketpair: %v", err) + } + + fsopts := filesystemOptions{ + maxActiveRequests: maxActiveRequestsDefault, + maxRead: 4096, + } + conn, err := newFUSEConnectionOpts(&fsopts) + if err != nil { + unix.Close(fds[0]) + unix.Close(fds[1]) + t.Fatalf("newFUSEConnectionOpts: %v", err) + } + + conn.setInitialized() + conn.mu.Lock() + conn.connInitSuccess = true + conn.maxWrite = 4096 + conn.mu.Unlock() + + hc := newHostConnection(conn, int32(fds[0])) + hc.startReader() + + cleanup := func() { + unix.Shutdown(fds[1], unix.SHUT_RDWR) + unix.Shutdown(fds[0], unix.SHUT_RDWR) + unix.Close(fds[0]) + unix.Close(fds[1]) + } + return hc, fds[1], cleanup +} + +// echoServer reads one FUSE request from serverFD and echoes the payload +// back as a response. It signals completion on the done channel. +func echoServer(t *testing.T, serverFD int, done chan struct{}) { + t.Helper() + defer close(done) + + buf := make([]byte, linux.FUSE_MIN_READ_BUFFER) + n, err := unix.Read(serverFD, buf) + if err != nil { + t.Errorf("server Read: %v", err) + return + } + if n < int(linux.SizeOfFUSEHeaderIn) { + t.Errorf("server: short read %d bytes", n) + return + } + + var reqHdr linux.FUSEHeaderIn + reqHdr.UnmarshalUnsafe(buf[:linux.SizeOfFUSEHeaderIn]) + + payload := buf[linux.SizeOfFUSEHeaderIn:n] + respLen := linux.SizeOfFUSEHeaderOut + uint32(len(payload)) + respBuf := make([]byte, respLen) + + respHdr := linux.FUSEHeaderOut{ + Len: respLen, + Error: 0, + Unique: reqHdr.Unique, + } + respHdr.MarshalUnsafe(respBuf[:linux.SizeOfFUSEHeaderOut]) + copy(respBuf[linux.SizeOfFUSEHeaderOut:], payload) + + if _, err := unix.Write(serverFD, respBuf); err != nil { + t.Errorf("server Write: %v", err) + } +} + +// echoServerN reads count FUSE requests from serverFD and echoes each +// payload back as a response. It signals completion on the done channel. +func echoServerN(t *testing.T, serverFD int, count int, done chan struct{}) { + t.Helper() + defer close(done) + + for i := 0; i < count; i++ { + buf := make([]byte, linux.FUSE_MIN_READ_BUFFER) + n, err := unix.Read(serverFD, buf) + if err != nil { + t.Errorf("server Read %d: %v", i, err) + return + } + if n < int(linux.SizeOfFUSEHeaderIn) { + t.Errorf("server: short read %d bytes on request %d", n, i) + return + } + + var reqHdr linux.FUSEHeaderIn + reqHdr.UnmarshalUnsafe(buf[:linux.SizeOfFUSEHeaderIn]) + + payload := buf[linux.SizeOfFUSEHeaderIn:n] + respLen := linux.SizeOfFUSEHeaderOut + uint32(len(payload)) + respBuf := make([]byte, respLen) + + respHdr := linux.FUSEHeaderOut{ + Len: respLen, + Error: 0, + Unique: reqHdr.Unique, + } + respHdr.MarshalUnsafe(respBuf[:linux.SizeOfFUSEHeaderOut]) + copy(respBuf[linux.SizeOfFUSEHeaderOut:], payload) + + if _, err := unix.Write(serverFD, respBuf); err != nil { + t.Errorf("server Write %d: %v", i, err) + return + } + } +} + +func TestHostConnectionCall(t *testing.T) { + s := setup(t) + defer s.Destroy() + + hc, serverFD, cleanup := newTestHostConnection(t) + defer cleanup() + + done := make(chan struct{}) + go echoServer(t, serverFD, done) + + creds := auth.CredentialsFromContext(s.Ctx) + testObj := primitive.Uint32(42) + req := hc.conn.NewRequest(creds, 1, 1, echoTestOpcode, &testObj) + + resp, err := hc.Call(s.Ctx, req) + if err != nil { + t.Fatalf("Call: %v", err) + } + + <-done + + if resp.hdr.Error != 0 { + t.Fatalf("response error: %d", resp.hdr.Error) + } + if resp.hdr.Unique != req.hdr.Unique { + t.Fatalf("unique mismatch: got %d, want %d", resp.hdr.Unique, req.hdr.Unique) + } + + var got primitive.Uint32 + if err := resp.UnmarshalPayload(&got); err != nil { + t.Fatalf("UnmarshalPayload: %v", err) + } + if got != testObj { + t.Fatalf("payload: got %d, want %d", got, testObj) + } +} + +func TestHostConnectionInit(t *testing.T) { + s := setup(t) + defer s.Destroy() + + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_SEQPACKET, 0) + if err != nil { + t.Fatalf("Socketpair: %v", err) + } + defer unix.Close(fds[1]) + defer unix.Close(fds[0]) + defer unix.Shutdown(fds[0], unix.SHUT_RDWR) + defer unix.Shutdown(fds[1], unix.SHUT_RDWR) + + fsopts := filesystemOptions{ + maxActiveRequests: maxActiveRequestsDefault, + maxRead: 4096, + } + conn, err := newFUSEConnectionOpts(&fsopts) + if err != nil { + t.Fatalf("newFUSEConnectionOpts: %v", err) + } + hc := newHostConnection(conn, int32(fds[0])) + + const testMaxWrite uint32 = 65536 + + done := make(chan struct{}) + go func() { + defer close(done) + + buf := make([]byte, linux.FUSE_MIN_READ_BUFFER) + n, err := unix.Read(fds[1], buf) + if err != nil { + t.Errorf("server Read: %v", err) + return + } + + var reqHdr linux.FUSEHeaderIn + reqHdr.UnmarshalUnsafe(buf[:linux.SizeOfFUSEHeaderIn]) + if reqHdr.Opcode != linux.FUSE_INIT { + t.Errorf("expected FUSE_INIT opcode, got %d", reqHdr.Opcode) + return + } + _ = n + + initOut := linux.FUSEInitOut{ + Major: linux.FUSE_KERNEL_VERSION, + Minor: linux.FUSE_KERNEL_MINOR_VERSION, + MaxWrite: testMaxWrite, + } + respLen := uint32(linux.SizeOfFUSEHeaderOut) + uint32(initOut.SizeBytes()) + respBuf := make([]byte, respLen) + + respHdr := linux.FUSEHeaderOut{ + Len: respLen, + Error: 0, + Unique: reqHdr.Unique, + } + respHdr.MarshalUnsafe(respBuf[:linux.SizeOfFUSEHeaderOut]) + initOut.MarshalUnsafe(respBuf[linux.SizeOfFUSEHeaderOut:]) + + if _, err := unix.Write(fds[1], respBuf); err != nil { + t.Errorf("server Write: %v", err) + } + }() + + creds := auth.CredentialsFromContext(s.Ctx) + if err := hc.InitSend(creds, 1, true); err != nil { + t.Fatalf("InitSend: %v", err) + } + + <-done + + if !conn.isInitialized() { + t.Fatal("connection not initialized after InitSend") + } + + conn.mu.Lock() + if !conn.connInitSuccess { + t.Error("connInitSuccess not set") + } + if conn.maxWrite < fuseMinMaxWrite { + t.Errorf("maxWrite = %d, want >= %d", conn.maxWrite, fuseMinMaxWrite) + } + conn.mu.Unlock() +} + +func TestHostConnectionCallAsync(t *testing.T) { + s := setup(t) + defer s.Destroy() + + hc, serverFD, cleanup := newTestHostConnection(t) + defer cleanup() + + done := make(chan struct{}) + go echoServerN(t, serverFD, 2, done) + + creds := auth.CredentialsFromContext(s.Ctx) + asyncPayload := primitive.Uint32(99) + asyncReq := hc.conn.NewRequest(creds, 1, 1, echoTestOpcode, &asyncPayload) + + if err := hc.CallAsync(s.Ctx, asyncReq); err != nil { + t.Fatalf("CallAsync: %v", err) + } + + // Make a subsequent sync Call to verify no stale data in the FD. + syncPayload := primitive.Uint32(123) + syncReq := hc.conn.NewRequest(creds, 2, 2, echoTestOpcode, &syncPayload) + + resp, err := hc.Call(s.Ctx, syncReq) + if err != nil { + t.Fatalf("Call after CallAsync: %v", err) + } + <-done + + if resp.hdr.Unique != syncReq.hdr.Unique { + t.Fatalf("unique mismatch after async: got %d, want %d", resp.hdr.Unique, syncReq.hdr.Unique) + } + + var got primitive.Uint32 + if err := resp.UnmarshalPayload(&got); err != nil { + t.Fatalf("UnmarshalPayload: %v", err) + } + if got != syncPayload { + t.Fatalf("payload after async: got %d, want %d", got, syncPayload) + } +} + +func TestHostConnectionConcurrent(t *testing.T) { + s := setup(t) + defer s.Destroy() + + hc, serverFD, cleanup := newTestHostConnection(t) + defer cleanup() + + const numRequests = 10 + + serverDone := make(chan struct{}) + go echoServerN(t, serverFD, numRequests, serverDone) + + creds := auth.CredentialsFromContext(s.Ctx) + + var wg sync.WaitGroup + errs := make(chan error, numRequests) + + for i := 0; i < numRequests; i++ { + wg.Add(1) + go func(val uint32) { + defer wg.Done() + payload := primitive.Uint32(val) + req := hc.conn.NewRequest(creds, 1, 1, echoTestOpcode, &payload) + + // Each goroutine needs its own context because NoTask.Block + // has unsynchronized state. + ctx := kernel.KernelFromContext(s.Ctx).SupervisorContext() + resp, err := hc.Call(ctx, req) + if err != nil { + errs <- err + return + } + if resp.hdr.Unique != req.hdr.Unique { + errs <- linuxerr.EINVAL + return + } + var got primitive.Uint32 + if err := resp.UnmarshalPayload(&got); err != nil { + errs <- err + return + } + if got != payload { + errs <- linuxerr.EINVAL + return + } + }(uint32(i)) + } + + wg.Wait() + close(errs) + <-serverDone + + for err := range errs { + t.Fatalf("concurrent call failed: %v", err) + } +} + +func TestHostConnectionNotConnected(t *testing.T) { + s := setup(t) + defer s.Destroy() + + hc, _, cleanup := newTestHostConnection(t) + defer cleanup() + + // Disconnect the connection. + hc.conn.mu.Lock() + hc.conn.connected = false + hc.conn.mu.Unlock() + + creds := auth.CredentialsFromContext(s.Ctx) + testObj := primitive.Uint32(0) + req := hc.conn.NewRequest(creds, 1, 1, echoTestOpcode, &testObj) + + _, err := hc.Call(s.Ctx, req) + if !linuxerr.Equals(linuxerr.ENOTCONN, err) { + t.Fatalf("expected ENOTCONN, got %v", err) + } +} diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go index 90786a41b5..0ffb2e0ccf 100644 --- a/pkg/sentry/fsimpl/fuse/request_response.go +++ b/pkg/sentry/fsimpl/fuse/request_response.go @@ -144,6 +144,10 @@ type futureResponse struct { // If this request is async. async bool + + // buf is a fixed-size buffer for response data. The host connection + // path slices data from this buffer to avoid a per-response allocation. + buf [linux.FUSE_MIN_READ_BUFFER]byte } // newFutureResponse creates a future response to a FUSE request. diff --git a/pkg/sentry/fsimpl/fuse/save_restore.go b/pkg/sentry/fsimpl/fuse/save_restore.go index a336ae8a12..2bf8da036a 100644 --- a/pkg/sentry/fsimpl/fuse/save_restore.go +++ b/pkg/sentry/fsimpl/fuse/save_restore.go @@ -20,6 +20,10 @@ func (fRes *futureResponse) afterLoad(context.Context) { fRes.ch = make(chan struct{}) } +func (conn *connection) afterLoad(context.Context) { + conn.fuseConn = &deviceConn{conn: conn} +} + func (conn *connection) saveFullQueueCh() int { return cap(conn.fullQueueCh) } diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index ad681735ad..a7d374ae86 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -806,6 +806,16 @@ func (f *fileDescription) Release(context.Context) { // noop } +// HostFD returns the underlying host file descriptor number. +func (f *fileDescription) HostFD() int { + return f.inode.hostFD +} + +// HostFD returns the underlying host file descriptor number. +func (i *inode) HostFD() int { + return i.hostFD +} + // Allocate implements vfs.FileDescriptionImpl.Allocate. func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { if f.inode.readonly { diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 48608b1523..e4f61d741b 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -567,3 +567,8 @@ type PrependPathSyntheticError struct{} func (PrependPathSyntheticError) Error() string { return "vfs.FilesystemImpl.PrependPath() prepended synthetic name" } + +// HostFDProvider is implemented by VFS objects backed by a host FD. +type HostFDProvider interface { + HostFD() int +} diff --git a/test/fuse_host/BUILD b/test/fuse_host/BUILD new file mode 100644 index 0000000000..77a50d51e5 --- /dev/null +++ b/test/fuse_host/BUILD @@ -0,0 +1,30 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package( + default_applicable_licenses = ["//:license"], + licenses = ["notice"], +) + +go_library( + name = "fusehost", + srcs = ["server.go"], + deps = [ + "//pkg/abi/linux", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +go_test( + name = "fusehost_test", + srcs = ["fuse_host_test.go"], + data = [ + "//runsc", + "//test/fuse_host/workload", + ], + library = ":fusehost", + deps = [ + "//pkg/test/testutil", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/test/fuse_host/fuse_host_test.go b/test/fuse_host/fuse_host_test.go new file mode 100644 index 0000000000..52c24431bb --- /dev/null +++ b/test/fuse_host/fuse_host_test.go @@ -0,0 +1,113 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fusehost + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +const guestFD = 101 + +func TestFuseHostPassthrough(t *testing.T) { + runscPath, err := testutil.FindFile("runsc/runsc") + if err != nil { + t.Fatalf("FindFile(runsc): %v", err) + } + workloadPath, err := testutil.FindFile("test/fuse_host/workload/workload") + if err != nil { + t.Fatalf("FindFile(workload): %v", err) + } + + // Create backing directory with test data. + backDir := t.TempDir() + if err := os.WriteFile(filepath.Join(backDir, "testfile"), []byte("hello from the host FUSE server\n"), 0644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + // Create socketpair for FUSE communication. + fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + t.Fatalf("Socketpair: %v", err) + } + sandboxFile := os.NewFile(uintptr(fds[0]), "fuse-sandbox") + defer sandboxFile.Close() + serverFD := fds[1] + defer unix.Close(serverFD) + + // Start the FUSE server. + go Serve(serverFD, backDir) + + // Set up OCI bundle with a writable /tmp for the FUSE mount point. + spec := testutil.NewSpecWithArgs(workloadPath, fmt.Sprintf("--fd=%d", guestFD)) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "tmpfs", + Destination: "/tmp", + }) + bundleDir, cleanupBundle, err := testutil.SetupBundleDir(spec) + if err != nil { + t.Fatalf("SetupBundleDir: %v", err) + } + defer cleanupBundle() + + rootDir, cleanupRoot, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("SetupRootDir: %v", err) + } + defer cleanupRoot() + + id := testutil.RandomContainerID() + + // Build runsc command. + cmd := exec.Command(runscPath, + "--root="+rootDir, + "--rootless", + "--TESTONLY-unsafe-nonroot", + "--network=none", + "run", + fmt.Sprintf("--pass-fd=3:%d", guestFD), + "--bundle="+bundleDir, + id, + ) + cmd.ExtraFiles = []*os.File{sandboxFile} + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.SysProcAttr = &unix.SysProcAttr{ + Cloneflags: unix.CLONE_NEWUSER | unix.CLONE_NEWNS, + UidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getuid(), Size: 1}, + }, + GidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getgid(), Size: 1}, + }, + GidMappingsEnableSetgroups: false, + Credential: &syscall.Credential{ + Uid: 0, + Gid: 0, + }, + } + + if err := cmd.Run(); err != nil { + t.Fatalf("runsc run: %v", err) + } +} diff --git a/test/fuse_host/server.go b/test/fuse_host/server.go new file mode 100644 index 0000000000..817ae68d4d --- /dev/null +++ b/test/fuse_host/server.go @@ -0,0 +1,314 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fusehost implements a minimal FUSE protocol server for testing +// the host FD passthrough path. It forwards all operations to a backing +// directory on the host filesystem. +package fusehost + +import ( + "os" + "path/filepath" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" +) + +// Serve runs a FUSE protocol server on fd, backed by the host directory +// backDir. It handles requests until the connection is closed or an error +// occurs. Intended to be called as a goroutine. +func Serve(fd int, backDir string) { + s := &server{ + fd: fd, + backDir: backDir, + nextFh: 1, + openFiles: make(map[uint64]*os.File), + } + s.serve() +} + +type server struct { + fd int + backDir string + nextFh uint64 + openFiles map[uint64]*os.File +} + +func (s *server) serve() { + for { + buf := make([]byte, 64*1024) + n, err := unix.Read(s.fd, buf) + if err != nil || n == 0 { + return + } + if n < int(linux.SizeOfFUSEHeaderIn) { + return + } + buf = buf[:n] + + var hdr linux.FUSEHeaderIn + hdr.UnmarshalUnsafe(buf[:linux.SizeOfFUSEHeaderIn]) + payload := buf[linux.SizeOfFUSEHeaderIn:] + + resp := s.handleRequest(&hdr, payload) + if resp == nil { + continue + } + if _, err := unix.Write(s.fd, resp); err != nil { + return + } + } +} + +func (s *server) handleRequest(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + switch hdr.Opcode { + case linux.FUSE_INIT: + return s.handleInit(hdr) + case linux.FUSE_GETATTR: + return s.handleGetAttr(hdr) + case linux.FUSE_LOOKUP: + return s.handleLookup(hdr, payload) + case linux.FUSE_OPEN: + return s.handleOpen(hdr, payload) + case linux.FUSE_READ: + return s.handleRead(hdr, payload) + case linux.FUSE_WRITE: + return s.handleWrite(hdr, payload) + case linux.FUSE_FLUSH: + return s.replyOK(hdr) + case linux.FUSE_RELEASE: + return s.handleRelease(hdr, payload) + case linux.FUSE_ACCESS: + return s.replyOK(hdr) + case linux.FUSE_STATFS: + return s.handleStatFS(hdr) + default: + return s.replyError(hdr, -int32(unix.ENOSYS)) + } +} + +func (s *server) handleInit(hdr *linux.FUSEHeaderIn) []byte { + out := linux.FUSEInitOut{ + Major: linux.FUSE_KERNEL_VERSION, + Minor: linux.FUSE_KERNEL_MINOR_VERSION, + MaxWrite: 65536, + Flags: linux.FUSE_BIG_WRITES, + } + return s.marshalReply(hdr, &out) +} + +func (s *server) handleGetAttr(hdr *linux.FUSEHeaderIn) []byte { + path := s.nodeIDToPath(hdr.NodeID) + var stat unix.Stat_t + if err := unix.Stat(path, &stat); err != nil { + return s.replyError(hdr, -int32(unix.ENOENT)) + } + out := linux.FUSEAttrOut{ + AttrValid: 1, + Attr: statToFUSEAttr(stat, hdr.NodeID), + } + return s.marshalReply(hdr, &out) +} + +func (s *server) handleLookup(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + nameEnd := 0 + for nameEnd < len(payload) && payload[nameEnd] != 0 { + nameEnd++ + } + name := string(payload[:nameEnd]) + + path := filepath.Join(s.nodeIDToPath(hdr.NodeID), name) + var stat unix.Stat_t + if err := unix.Stat(path, &stat); err != nil { + return s.replyError(hdr, -int32(unix.ENOENT)) + } + + out := linux.FUSEEntryOut{ + NodeID: stat.Ino, + Generation: 1, + EntryValid: 1, + AttrValid: 1, + Attr: statToFUSEAttr(stat, stat.Ino), + } + return s.marshalReply(hdr, &out) +} + +func (s *server) handleOpen(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEOpenIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + path := s.nodeIDToPath(hdr.NodeID) + flags := int(in.Flags) & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR | os.O_APPEND | os.O_TRUNC) + f, err := os.OpenFile(path, flags, 0) + if err != nil { + return s.replyError(hdr, -int32(unix.EIO)) + } + + fh := s.nextFh + s.nextFh++ + s.openFiles[fh] = f + + out := linux.FUSEOpenOut{ + Fh: fh, + OpenFlag: linux.FOPEN_DIRECT_IO, + } + return s.marshalReply(hdr, &out) +} + +func (s *server) handleRead(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEReadIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + f, ok := s.openFiles[in.Fh] + if !ok { + return s.replyError(hdr, -int32(unix.EBADF)) + } + + data := make([]byte, in.Size) + n, err := f.ReadAt(data, int64(in.Offset)) + if err != nil && n == 0 { + return s.dataReply(hdr, nil) + } + return s.dataReply(hdr, data[:n]) +} + +func (s *server) handleWrite(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEWriteIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + + f, ok := s.openFiles[in.Fh] + if !ok { + return s.replyError(hdr, -int32(unix.EBADF)) + } + + writeData := payload[in.SizeBytes():] + n, err := f.WriteAt(writeData, int64(in.Offset)) + if err != nil { + return s.replyError(hdr, -int32(unix.EIO)) + } + + out := linux.FUSEWriteOut{Size: uint32(n)} + return s.marshalReply(hdr, &out) +} + +func (s *server) handleRelease(hdr *linux.FUSEHeaderIn, payload []byte) []byte { + var in linux.FUSEReleaseIn + in.UnmarshalUnsafe(payload[:in.SizeBytes()]) + if f, ok := s.openFiles[in.Fh]; ok { + f.Close() + delete(s.openFiles, in.Fh) + } + return s.replyOK(hdr) +} + +func (s *server) handleStatFS(hdr *linux.FUSEHeaderIn) []byte { + var statfs unix.Statfs_t + if err := unix.Statfs(s.backDir, &statfs); err != nil { + return s.replyError(hdr, -int32(unix.EIO)) + } + out := linux.FUSEStatfsOut{ + Blocks: statfs.Blocks, + BlocksFree: statfs.Bfree, + BlocksAvailable: statfs.Bavail, + Files: statfs.Files, + FilesFree: statfs.Ffree, + BlockSize: uint32(statfs.Bsize), + NameLength: uint32(statfs.Namelen), + FragmentSize: uint32(statfs.Frsize), + } + return s.marshalReply(hdr, &out) +} + +func (s *server) nodeIDToPath(nodeID uint64) string { + if nodeID == linux.FUSE_ROOT_ID { + return s.backDir + } + entries, err := os.ReadDir(s.backDir) + if err != nil { + return s.backDir + } + for _, e := range entries { + path := filepath.Join(s.backDir, e.Name()) + var stat unix.Stat_t + if err := unix.Stat(path, &stat); err == nil && stat.Ino == nodeID { + return path + } + } + return s.backDir +} + +type marshalUnsafer interface { + SizeBytes() int + MarshalUnsafe(dst []byte) []byte +} + +func (s *server) marshalReply(hdr *linux.FUSEHeaderIn, payload marshalUnsafer) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + payloadSize := payload.SizeBytes() + buf := make([]byte, hdrSize+payloadSize) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize + payloadSize), + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf[:hdrSize]) + payload.MarshalUnsafe(buf[hdrSize:]) + return buf +} + +func (s *server) dataReply(hdr *linux.FUSEHeaderIn, data []byte) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + buf := make([]byte, hdrSize+len(data)) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize + len(data)), + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf[:hdrSize]) + copy(buf[hdrSize:], data) + return buf +} + +func (s *server) replyOK(hdr *linux.FUSEHeaderIn) []byte { + return s.replyError(hdr, 0) +} + +func (s *server) replyError(hdr *linux.FUSEHeaderIn, errno int32) []byte { + hdrSize := int(linux.SizeOfFUSEHeaderOut) + buf := make([]byte, hdrSize) + outHdr := linux.FUSEHeaderOut{ + Len: uint32(hdrSize), + Error: errno, + Unique: hdr.Unique, + } + outHdr.MarshalUnsafe(buf) + return buf +} + +func statToFUSEAttr(stat unix.Stat_t, ino uint64) linux.FUSEAttr { + return linux.FUSEAttr{ + Ino: ino, + Size: uint64(stat.Size), + Blocks: uint64(stat.Blocks), + Atime: uint64(stat.Atim.Sec), + Mtime: uint64(stat.Mtim.Sec), + Ctime: uint64(stat.Ctim.Sec), + AtimeNsec: uint32(stat.Atim.Nsec), + MtimeNsec: uint32(stat.Mtim.Nsec), + CtimeNsec: uint32(stat.Ctim.Nsec), + Mode: stat.Mode, + Nlink: uint32(stat.Nlink), + UID: stat.Uid, + GID: stat.Gid, + BlkSize: uint32(stat.Blksize), + } +} diff --git a/test/fuse_host/workload/BUILD b/test/fuse_host/workload/BUILD new file mode 100644 index 0000000000..c6f4f7aaf3 --- /dev/null +++ b/test/fuse_host/workload/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "go_binary") + +package( + default_applicable_licenses = ["//:license"], + licenses = ["notice"], +) + +go_binary( + name = "workload", + srcs = ["workload.go"], + visibility = ["//test/fuse_host:__pkg__"], + deps = [ + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/test/fuse_host/workload/workload.go b/test/fuse_host/workload/workload.go new file mode 100644 index 0000000000..82d29da1e3 --- /dev/null +++ b/test/fuse_host/workload/workload.go @@ -0,0 +1,97 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary workload runs inside a gVisor sandbox to exercise the FUSE host +// passthrough path. It mounts a FUSE filesystem using a pre-passed host FD, +// then performs filesystem operations to verify correctness. +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + + "golang.org/x/sys/unix" +) + +var fuseFD = flag.Int("fd", -1, "file descriptor for the FUSE connection") + +func main() { + flag.Parse() + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "FAIL: %v\n", err) + os.Exit(1) + } +} + +func run() error { + if *fuseFD < 0 { + return fmt.Errorf("--fd is required") + } + + mountPoint, err := os.MkdirTemp("", "fuse-mount") + if err != nil { + return fmt.Errorf("MkdirTemp: %v", err) + } + defer os.RemoveAll(mountPoint) + + mountOpts := fmt.Sprintf("fd=%d,user_id=0,group_id=0,rootmode=40000", *fuseFD) + if err := unix.Mount("fuse", mountPoint, "fuse", unix.MS_NODEV|unix.MS_NOSUID, mountOpts); err != nil { + return fmt.Errorf("mount: %v", err) + } + defer unix.Unmount(mountPoint, unix.MNT_DETACH) + + // Stat root directory. + var st unix.Stat_t + if err := unix.Stat(mountPoint, &st); err != nil { + return fmt.Errorf("stat root: %v", err) + } + if st.Mode&unix.S_IFDIR == 0 { + return fmt.Errorf("root is not a directory: mode=%o", st.Mode) + } + + // Read testfile. + testfilePath := filepath.Join(mountPoint, "testfile") + data, err := os.ReadFile(testfilePath) + if err != nil { + return fmt.Errorf("read testfile: %v", err) + } + expected := "hello from the host FUSE server\n" + if string(data) != expected { + return fmt.Errorf("testfile content: got %q, want %q", string(data), expected) + } + + // Write new data to the existing file and read it back. + writeData := "overwritten by sandbox workload\n" + f, err := os.OpenFile(testfilePath, os.O_WRONLY, 0) + if err != nil { + return fmt.Errorf("open testfile for write: %v", err) + } + if _, err := f.Write([]byte(writeData)); err != nil { + f.Close() + return fmt.Errorf("write testfile: %v", err) + } + f.Close() + + data, err = os.ReadFile(testfilePath) + if err != nil { + return fmt.Errorf("re-read testfile: %v", err) + } + if string(data) != writeData { + return fmt.Errorf("re-read content: got %q, want %q", string(data), writeData) + } + + return nil +}