From 2d57ca8a8677965dd1326821a53ed1f9c79bb91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Kohlschu=CC=88tter?= Date: Mon, 14 Jul 2025 15:15:12 +0200 Subject: [PATCH 1/2] core: OperationLOCK: Limit scope of addDisposeListener lambda MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When specifying lambdas for cleanup operations, it is good practice to reduce the scope of instances referenced ("pinned") in the lambda scope. Right now, we reference unnecessary objects, adding to heap fragmentation. Reduce the scope of referenced objects by accessing the required references before entering the dispose listener lambda. Signed-off-by: Christian Kohlschütter --- .../src/main/java/org/dcache/nfs/v4/OperationLOCK.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/dcache/nfs/v4/OperationLOCK.java b/core/src/main/java/org/dcache/nfs/v4/OperationLOCK.java index d3b9575b..dbff6f59 100644 --- a/core/src/main/java/org/dcache/nfs/v4/OperationLOCK.java +++ b/core/src/main/java/org/dcache/nfs/v4/OperationLOCK.java @@ -24,8 +24,10 @@ import org.dcache.nfs.status.InvalException; import org.dcache.nfs.status.OpenModeException; import org.dcache.nfs.status.ServerFaultException; +import org.dcache.nfs.util.Opaque; import org.dcache.nfs.v4.nlm.LockDeniedException; import org.dcache.nfs.v4.nlm.LockException; +import org.dcache.nfs.v4.nlm.LockManager; import org.dcache.nfs.v4.nlm.NlmLock; import org.dcache.nfs.v4.xdr.LOCK4denied; import org.dcache.nfs.v4.xdr.LOCK4resok; @@ -118,12 +120,12 @@ public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNF NlmLock lock = new NlmLock(lockOwner, _args.oplock.locktype, _args.oplock.offset.value, _args.oplock.length.value); - context.getLm().lock(inode.getLockKey(), lock); + Opaque lockKey = inode.getLockKey(); + LockManager lm = context.getLm(); + lm.lock(lockKey, lock); // ensure, that on close locks will be released - lock_state.addDisposeListener(s -> { - context.getLm().unlockIfExists(inode.getLockKey(), lock); - }); + lock_state.addDisposeListener(s -> lm.unlockIfExists(lockKey, lock)); // FIXME: we might run into race condition, thus updating sedid must be fenced! lock_state.bumpSeqid(); From 03d56f6a6ca4742e173c2f946647df74862b59b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Kohlschu=CC=88tter?= Date: Tue, 15 Jul 2025 17:25:03 +0200 Subject: [PATCH 2/2] core: Improve access tracking performance for read/write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, read and write operations to the VirtualFileSystem are "stateless" in the sense that there is no corresponding "open" and "close" exposed. At the NFS4 level, this exists, and nfs4j tracks these internally already (stateid4.opaque byte sequences), but they're not exposed to VirtualFileSystem. This is a performance problem: PseudoFs calls checkAccess(inode, ACE4_READ_DATA/ACE4_WRITE_DATA) for each read/write, which in turn triggers a Stat for each read/write operation. This incurs an unnecessary performance penalty of more than 20%. The access check is unnecessary because a successful check upon "open" will be valid for the entire lifetime of the client-side file descriptor, just as it is when opening a file descriptor on other POSIX file systems. In order to properly track granted access, we can leverage data stored in stateid4 and the existing work in FileTracker. Since stateid4 exists in NFSv4 only, we make sure there is no performance degradation in NFSv3 and no exposure of NFSv4-only internals in the VirtualFileSystem API: Expose stateids as OpenHandle to VirtualFileSystem read/write, defaulting to existing read/write methods for backwards compatibility. In PseudoFS, check SharedAccess state from FileTracker and use this to determine access during read/write when available. Rework stateid4 to implement OpenHandle, and expose ClientId (the first 8 bytes of the opaque) so we can look up the information in NFSv4StateHandler. With the change, for sustained reads I'm seeing 854 MB/s instead of 712 MB/s on LocalFileSystem, and 2000 MB/s instead of 1666 MB/s on a custom implementation. Signed-off-by: Christian Kohlschütter --- .../java/org/dcache/nfs/v4/NFSServerV41.java | 2 +- .../org/dcache/nfs/v4/NFSv4StateHandler.java | 11 ++++- .../java/org/dcache/nfs/v4/OperationREAD.java | 2 +- .../org/dcache/nfs/v4/OperationWRITE.java | 2 +- .../java/org/dcache/nfs/v4/xdr/stateid4.java | 14 +++++- .../dcache/nfs/vfs/ForwardingFileSystem.java | 10 ++++- .../java/org/dcache/nfs/vfs/OpenHandle.java | 20 +++++++++ .../java/org/dcache/nfs/vfs/PseudoFs.java | 43 +++++++++++++++++-- .../org/dcache/nfs/vfs/VirtualFileSystem.java | 20 ++++++++- .../org/dcache/nfs/v4/OperationWRITETest.java | 6 +-- 10 files changed, 116 insertions(+), 14 deletions(-) create mode 100644 core/src/main/java/org/dcache/nfs/vfs/OpenHandle.java diff --git a/core/src/main/java/org/dcache/nfs/v4/NFSServerV41.java b/core/src/main/java/org/dcache/nfs/v4/NFSServerV41.java index e1da08e6..85b6588d 100644 --- a/core/src/main/java/org/dcache/nfs/v4/NFSServerV41.java +++ b/core/src/main/java/org/dcache/nfs/v4/NFSServerV41.java @@ -134,7 +134,7 @@ public COMPOUND4res NFSPROC4_COMPOUND_4(RpcCall call$, COMPOUND4args arg1) { } res.resarray = new ArrayList<>(arg1.argarray.length); - VirtualFileSystem fs = new PseudoFs(_fs, call$, _exportTable); + VirtualFileSystem fs = new PseudoFs(_fs, call$, _exportTable, _statHandler); CompoundContextBuilder builder = new CompoundContextBuilder() .withMinorversion(arg1.minorversion.value) diff --git a/core/src/main/java/org/dcache/nfs/v4/NFSv4StateHandler.java b/core/src/main/java/org/dcache/nfs/v4/NFSv4StateHandler.java index b7d64a4d..e489c33a 100644 --- a/core/src/main/java/org/dcache/nfs/v4/NFSv4StateHandler.java +++ b/core/src/main/java/org/dcache/nfs/v4/NFSv4StateHandler.java @@ -237,13 +237,22 @@ public NFS4Client getClient(clientid4 clientid) throws StaleClientidException { } } + public NFS4Client getClientIfExists(long clientId) { + _readLock.lock(); + try { + return _clientsByServerId.get(new clientid4(clientId)); + } finally { + _readLock.unlock(); + } + } + public NFS4Client getClientIdByStateId(stateid4 stateId) throws ChimeraNFSException { _readLock.lock(); try { checkState(_running, "NFS state handler not running"); - clientid4 clientId = new clientid4(Bytes.getLong(stateId.other, 0)); + clientid4 clientId = new clientid4(stateId.getClientId()); NFS4Client client = _clientsByServerId.get(clientId); if (client == null) { throw new BadStateidException("no client for stateid: " + stateId); diff --git a/core/src/main/java/org/dcache/nfs/v4/OperationREAD.java b/core/src/main/java/org/dcache/nfs/v4/OperationREAD.java index 649fe457..26b10ef2 100644 --- a/core/src/main/java/org/dcache/nfs/v4/OperationREAD.java +++ b/core/src/main/java/org/dcache/nfs/v4/OperationREAD.java @@ -73,7 +73,7 @@ public void process(CompoundContext context, nfs_resop4 result) throws IOExcepti ByteBuffer buf = ByteBuffer.allocate(count); res.resok4 = new READ4resok(); - int bytesRead = context.getFs().read(inode, buf, offset, res.resok4::setEOF); + int bytesRead = context.getFs().read(stateid, inode, buf, offset, res.resok4::setEOF); if (bytesRead < 0) { buf.clear(); diff --git a/core/src/main/java/org/dcache/nfs/v4/OperationWRITE.java b/core/src/main/java/org/dcache/nfs/v4/OperationWRITE.java index 27690b02..820e16de 100644 --- a/core/src/main/java/org/dcache/nfs/v4/OperationWRITE.java +++ b/core/src/main/java/org/dcache/nfs/v4/OperationWRITE.java @@ -87,7 +87,7 @@ public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNF } long offset = _args.opwrite.offset.value; - VirtualFileSystem.WriteResult writeResult = context.getFs().write(context.currentInode(), + VirtualFileSystem.WriteResult writeResult = context.getFs().write(stateid, context.currentInode(), _args.opwrite.data, offset, VirtualFileSystem.StabilityLevel.fromStableHow(_args.opwrite.stable)); if (writeResult.getBytesWritten() < 0) { diff --git a/core/src/main/java/org/dcache/nfs/v4/xdr/stateid4.java b/core/src/main/java/org/dcache/nfs/v4/xdr/stateid4.java index 701775fe..57492ae4 100644 --- a/core/src/main/java/org/dcache/nfs/v4/xdr/stateid4.java +++ b/core/src/main/java/org/dcache/nfs/v4/xdr/stateid4.java @@ -23,14 +23,17 @@ import java.io.Serializable; import java.util.Arrays; +import org.dcache.nfs.util.Opaque; +import org.dcache.nfs.vfs.OpenHandle; import org.dcache.oncrpc4j.rpc.OncRpcException; +import org.dcache.oncrpc4j.util.Bytes; import org.dcache.oncrpc4j.xdr.XdrAble; import org.dcache.oncrpc4j.xdr.XdrDecodingStream; import org.dcache.oncrpc4j.xdr.XdrEncodingStream; import com.google.common.io.BaseEncoding; -public class stateid4 implements XdrAble, Serializable { +public class stateid4 implements XdrAble, Serializable, OpenHandle { static final long serialVersionUID = -6677150504723505919L; @@ -95,6 +98,15 @@ public int hashCode() { return Arrays.hashCode(other); } + public long getClientId() { + return Bytes.getLong(other, 0); + } + + @Override + public Opaque getOpaque() { + return Opaque.forMutableByteArray(other); + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/core/src/main/java/org/dcache/nfs/vfs/ForwardingFileSystem.java b/core/src/main/java/org/dcache/nfs/vfs/ForwardingFileSystem.java index c22e1108..500a6ae6 100644 --- a/core/src/main/java/org/dcache/nfs/vfs/ForwardingFileSystem.java +++ b/core/src/main/java/org/dcache/nfs/vfs/ForwardingFileSystem.java @@ -107,8 +107,8 @@ public int read(Inode inode, ByteBuffer data, long offset) throws IOException { } @Override - public int read(Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { - return delegate().read(inode, data, offset, eofReached); + public int read(OpenHandle oh, Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { + return delegate().read(oh, inode, data, offset, eofReached); } @Override @@ -138,6 +138,12 @@ public WriteResult write(Inode inode, ByteBuffer data, long offset, StabilityLev return delegate().write(inode, data, offset, stabilityLevel); } + @Override + public WriteResult write(OpenHandle oh, Inode inode, ByteBuffer data, long offset, StabilityLevel stabilityLevel) + throws IOException { + return delegate().write(oh, inode, data, offset, stabilityLevel); + } + @Override public void commit(Inode inode, long offset, int count) throws IOException { delegate().commit(inode, offset, count); diff --git a/core/src/main/java/org/dcache/nfs/vfs/OpenHandle.java b/core/src/main/java/org/dcache/nfs/vfs/OpenHandle.java new file mode 100644 index 00000000..ad37fcf4 --- /dev/null +++ b/core/src/main/java/org/dcache/nfs/vfs/OpenHandle.java @@ -0,0 +1,20 @@ +package org.dcache.nfs.vfs; + +import org.dcache.nfs.util.Opaque; + +/** + * Describes metadata determined upon opening the resource at the NFS level. + *

+ * This is used, for example, to infer access privileges that were determined upon opening a resource, so read/write + * operations don't have to check every time. + * + * @see PseudoFs + */ +public interface OpenHandle { + /** + * Returns an opaque bytes representation of the handle. + * + * @return The opaque. + */ + Opaque getOpaque(); +} diff --git a/core/src/main/java/org/dcache/nfs/vfs/PseudoFs.java b/core/src/main/java/org/dcache/nfs/vfs/PseudoFs.java index e5e26478..26e269ab 100644 --- a/core/src/main/java/org/dcache/nfs/vfs/PseudoFs.java +++ b/core/src/main/java/org/dcache/nfs/vfs/PseudoFs.java @@ -72,9 +72,13 @@ import org.dcache.nfs.status.PermException; import org.dcache.nfs.status.RoFsException; import org.dcache.nfs.util.SubjectHolder; +import org.dcache.nfs.v4.NFS4Client; +import org.dcache.nfs.v4.NFSv4StateHandler; import org.dcache.nfs.v4.acl.Acls; import org.dcache.nfs.v4.xdr.acemask4; +import org.dcache.nfs.v4.xdr.nfs4_prot; import org.dcache.nfs.v4.xdr.nfsace4; +import org.dcache.nfs.v4.xdr.stateid4; import org.dcache.nfs.vfs.AclCheckable.Access; import org.dcache.nfs.vfs.Stat.StatAttribute; import org.dcache.oncrpc4j.rpc.RpcAuth; @@ -105,6 +109,7 @@ public class PseudoFs extends ForwardingFileSystem { private final VirtualFileSystem _inner; private final ExportTable _exportTable; private final RpcAuth _auth; + private final NFSv4StateHandler _stateHandler; private final static int ACCESS4_MASK = ACCESS4_DELETE | ACCESS4_EXECUTE | ACCESS4_EXTEND @@ -112,11 +117,16 @@ public class PseudoFs extends ForwardingFileSystem { | ACCESS4_XAREAD | ACCESS4_XAWRITE | ACCESS4_XALIST; public PseudoFs(VirtualFileSystem inner, RpcCall call, ExportTable exportTable) { + this(inner, call, exportTable, null); + } + + public PseudoFs(VirtualFileSystem inner, RpcCall call, ExportTable exportTable, NFSv4StateHandler stateHandler) { _inner = inner; _subject = call.getCredential().getSubject(); _auth = call.getCredential(); _inetAddress = call.getTransport().getRemoteSocketAddress(); _exportTable = exportTable; + _stateHandler = stateHandler; } @Override @@ -133,6 +143,26 @@ private boolean canAccess(Inode inode, Stat stat, int mode) { return false; } + private void checkAccessReadWriteData(OpenHandle oh, Inode inode, boolean write) throws IOException { + if (_stateHandler != null && oh instanceof stateid4) { + stateid4 stateId = (stateid4) oh; + NFS4Client client = _stateHandler.getClientIfExists(stateId.getClientId()); + if (client != null) { + int shareAccess; + try { + shareAccess = _stateHandler.getFileTracker().getShareAccess(client, inode, stateId); + if ((shareAccess & (write ? nfs4_prot.OPEN4_SHARE_ACCESS_WRITE + : nfs4_prot.OPEN4_SHARE_ACCESS_READ)) != 0) { + return; // permit + } + } catch (ChimeraNFSException e) { + // ignore; check below and throw proper exception + } + } + } + checkAccess(inode, write ? ACE4_WRITE_DATA : ACE4_READ_DATA); + } + @Override public int access(Subject subject, Inode inode, int mode) throws IOException { int accessmask = 0; @@ -330,9 +360,9 @@ public int read(Inode inode, ByteBuffer data, long offset) throws IOException { } @Override - public int read(Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { - checkAccess(inode, ACE4_READ_DATA); - return _inner.read(innerInode(inode), data, offset, eofReached); + public int read(OpenHandle oh, Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { + checkAccessReadWriteData(oh, inode, false); + return _inner.read(oh, innerInode(inode), data, offset, eofReached); } @Override @@ -383,6 +413,13 @@ public WriteResult write(Inode inode, ByteBuffer data, long offset, StabilityLev return _inner.write(innerInode(inode), data, offset, stabilityLevel); } + @Override + public WriteResult write(OpenHandle oh, Inode inode, ByteBuffer data, long offset, StabilityLevel stabilityLevel) + throws IOException { + checkAccessReadWriteData(oh, inode, true); + return _inner.write(oh, innerInode(inode), data, offset, stabilityLevel); + } + @Override public Stat getattr(Inode inode) throws IOException { checkAccess(inode, ACE4_READ_ATTRIBUTES); diff --git a/core/src/main/java/org/dcache/nfs/vfs/VirtualFileSystem.java b/core/src/main/java/org/dcache/nfs/vfs/VirtualFileSystem.java index fd1e778d..ff5ba2af 100644 --- a/core/src/main/java/org/dcache/nfs/vfs/VirtualFileSystem.java +++ b/core/src/main/java/org/dcache/nfs/vfs/VirtualFileSystem.java @@ -226,6 +226,7 @@ default int read(Inode inode, ByteBuffer data, long offset) throws IOException { * information via {@link #getattr(Inode)}), which may incur a higher, recurring cost than the inconvenience of a * single additional client roundtrip at the end of the file. * + * @param oh The open-handle, or {@code null}. * @param inode inode of the file to read from. * @param data byte array for writing. * @param offset file's position to read from. @@ -234,7 +235,7 @@ default int read(Inode inode, ByteBuffer data, long offset) throws IOException { * @return number of bytes read from the file, possibly zero. -1 if EOF is reached. * @throws IOException */ - default int read(Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { + default int read(OpenHandle oh, Inode inode, ByteBuffer data, long offset, Runnable eofReached) throws IOException { Stat stat = getattr(inode); Stat.Type statType = stat.type(); @@ -302,6 +303,7 @@ WriteResult write(Inode inode, byte[] data, long offset, int count, StabilityLev /** * Write provided {@code data} into inode with a given stability level. * + * @param oh The open-handle, or {@code null}. * @param inode inode of the file to write. * @param data data to be written. * @param offset the file position to begin writing at. @@ -317,6 +319,22 @@ default WriteResult write(Inode inode, ByteBuffer data, long offset, StabilityLe return write(inode, bytes, offset, count, stabilityLevel); } + /** + * Write provided {@code data} into inode with a given stability level. + * + * @param oh Carries metadata determined upon opening the resource at the NFS level, or {@code null}. + * @param inode inode of the file to write. + * @param data data to be written. + * @param offset the file position to begin writing at. + * @param stabilityLevel data stability level. + * @return write result. + * @throws IOException + */ + default WriteResult write(OpenHandle oh, Inode inode, ByteBuffer data, long offset, StabilityLevel stabilityLevel) + throws IOException { + return write(inode, data, offset, stabilityLevel); + } + /** * Flush data in {@code dirty} state to the stable storage. Typically follows * {@link #write(Inode, ByteBuffer, long, StabilityLevel)} operation. diff --git a/core/src/test/java/org/dcache/nfs/v4/OperationWRITETest.java b/core/src/test/java/org/dcache/nfs/v4/OperationWRITETest.java index b02bfa48..4b757e94 100644 --- a/core/src/test/java/org/dcache/nfs/v4/OperationWRITETest.java +++ b/core/src/test/java/org/dcache/nfs/v4/OperationWRITETest.java @@ -64,7 +64,7 @@ public void testLeaseUpdateForV40Client() throws UnknownHostException, ChimeraNF when(vfs.getattr(any())).thenReturn(fileStat); when(vfs.getattr(any(), any())).thenCallRealMethod(); - when(vfs.write(any(), any(), anyLong(), any())) + when(vfs.write(any(), any(), any(), anyLong(), any())) .thenReturn(new VirtualFileSystem.WriteResult(VirtualFileSystem.StabilityLevel.UNSTABLE, 1)); COMPOUND4args writeArgs = new CompoundBuilder() @@ -101,7 +101,7 @@ public void testNoLeaseUpdateForV41Client() throws UnknownHostException, Chimera when(vfs.getattr(any())).thenReturn(fileStat); when(vfs.getattr(any(), any())).thenCallRealMethod(); - when(vfs.write(any(), any(), anyLong(), any())) + when(vfs.write(any(), any(), any(), anyLong(), any())) .thenReturn(new VirtualFileSystem.WriteResult(VirtualFileSystem.StabilityLevel.UNSTABLE, 1)); COMPOUND4args writeArgs = new CompoundBuilder() @@ -140,7 +140,7 @@ public void testReturnWriteVerifier() throws UnknownHostException, ChimeraNFSExc when(vfs.getattr(any())).thenReturn(fileStat); when(vfs.getattr(any(), any())).thenCallRealMethod(); - when(vfs.write(any(), any(), anyLong(), any())) + when(vfs.write(any(), any(), any(), anyLong(), any())) .thenReturn(new VirtualFileSystem.WriteResult(VirtualFileSystem.StabilityLevel.UNSTABLE, 1)); COMPOUND4args writeArgs = new CompoundBuilder()