@@ -225,7 +225,7 @@ class AsyncIoContext : public IoContext {
225
225
folly::EventBase* evb,
226
226
size_t capacity,
227
227
bool useIoUring,
228
- std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec );
228
+ std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs );
229
229
230
230
~AsyncIoContext () override = default ;
231
231
@@ -279,18 +279,16 @@ class AsyncIoContext : public IoContext {
279
279
size_t numSubmitted_ = 0 ;
280
280
size_t numCompleted_ = 0 ;
281
281
282
- // Device info vector for FDP support
283
- const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
284
- // As of now, only one FDP enabled Device is supported
285
- static constexpr uint16_t kDefaultFdpIdx = 0u ;
282
+ // Map of file descriptors to FdpNvme device objects
283
+ const std::unordered_map<int , std::shared_ptr<FdpNvme>> fdpNvmeDevs_;
286
284
};
287
285
288
286
// An FileDevice manages direct I/O to either a single or multiple (RAID0)
289
287
// block device(s) or regular file(s).
290
288
class FileDevice : public Device {
291
289
public:
292
290
FileDevice (std::vector<folly::File>&& fvec,
293
- std::vector< std::shared_ptr<FdpNvme>>&& fdpNvmeVec ,
291
+ std::unordered_map< int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs ,
294
292
uint64_t size,
295
293
uint32_t blockSize,
296
294
uint32_t stripeSize,
@@ -317,8 +315,8 @@ class FileDevice : public Device {
317
315
// File vector for devices or regular files
318
316
const std::vector<folly::File> fvec_{};
319
317
320
- // Device info vector for FDP support
321
- const std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec_{} ;
318
+ // Map of file descriptors to FdpNvme device objects
319
+ const std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs_ ;
322
320
323
321
// RAID stripe size when multiple devices are used
324
322
const uint32_t stripeSize_;
@@ -750,20 +748,21 @@ bool SyncIoContext::submitIo(IOOp& op) {
750
748
/*
751
749
* AsyncIoContext
752
750
*/
753
- AsyncIoContext::AsyncIoContext (std::unique_ptr<folly::AsyncBase>&& asyncBase,
754
- size_t id,
755
- folly::EventBase* evb,
756
- size_t capacity,
757
- bool useIoUring,
758
- std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec)
751
+ AsyncIoContext::AsyncIoContext (
752
+ std::unique_ptr<folly::AsyncBase>&& asyncBase,
753
+ size_t id,
754
+ folly::EventBase* evb,
755
+ size_t capacity,
756
+ bool useIoUring,
757
+ std::unordered_map<int , std::shared_ptr<FdpNvme>> fdpNvmeDevs)
759
758
: asyncBase_(std::move(asyncBase)),
760
759
id_(id),
761
760
qDepth_(capacity),
762
761
useIoUring_(useIoUring),
763
- fdpNvmeVec_(fdpNvmeVec ) {
762
+ fdpNvmeDevs_(fdpNvmeDevs ) {
764
763
#ifdef CACHELIB_IOURING_DISABLE
765
764
// io_uring is not available on the system
766
- XDCHECK (!useIoUring_ && !(fdpNvmeVec_ .size () > 0 ));
765
+ XDCHECK (!useIoUring_ && !(fdpNvmeDevs_ .size () > 0 ));
767
766
useIoUring_ = false ;
768
767
#endif
769
768
if (evb) {
@@ -781,7 +780,7 @@ AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
781
780
" [{}] Created new async io context with qdepth {}{} io_engine {} {}" ,
782
781
getName (), qDepth_, qDepth_ == 1 ? " (sync wait)" : " " ,
783
782
useIoUring_ ? " io_uring" : " libaio" ,
784
- (fdpNvmeVec_ .size () > 0 ) ? " FDP enabled" : " " );
783
+ (fdpNvmeDevs_ .size () > 0 ) ? " FDP enabled" : " " );
785
784
}
786
785
787
786
void AsyncIoContext::pollCompletion () {
@@ -820,7 +819,7 @@ void AsyncIoContext::handleCompletion(
820
819
}
821
820
822
821
auto len = aop->result ();
823
- if (fdpNvmeVec_ .size () > 0 ) {
822
+ if (fdpNvmeDevs_ .size () > 0 ) {
824
823
// 0 means success here, so get the completed size from iop
825
824
len = !len ? iop->size_ : 0 ;
826
825
}
@@ -869,7 +868,7 @@ bool AsyncIoContext::submitIo(IOOp& op) {
869
868
}
870
869
871
870
std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepAsyncIo (IOOp& op) {
872
- if (fdpNvmeVec_ .size () > 0 ) {
871
+ if (fdpNvmeDevs_ .size () > 0 ) {
873
872
return prepNvmeIo (op);
874
873
}
875
874
@@ -905,10 +904,10 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
905
904
iouringCmdOp->initBase ();
906
905
struct io_uring_sqe & sqe = iouringCmdOp->getSqe ();
907
906
if (req.opType_ == OpType::READ) {
908
- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
909
- op.offset_ );
907
+ fdpNvmeDevs_. at (op. fd_ ) ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
908
+ op.offset_ );
910
909
} else {
911
- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepWriteUringCmdSqe (
910
+ fdpNvmeDevs_. at (op. fd_ ) ->prepWriteUringCmdSqe (
912
911
sqe, op.data_ , op.size_ , op.offset_ , op.placeHandle_ .value_or (-1 ));
913
912
}
914
913
io_uring_sqe_set_data (&sqe, iouringCmdOp.get ());
@@ -921,23 +920,24 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
921
920
/*
922
921
* FileDevice
923
922
*/
924
- FileDevice::FileDevice (std::vector<folly::File>&& fvec,
925
- std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
926
- uint64_t fileSize,
927
- uint32_t blockSize,
928
- uint32_t stripeSize,
929
- uint32_t maxIOSize,
930
- uint32_t maxDeviceWriteSize,
931
- IoEngine ioEngine,
932
- uint32_t qDepthPerContext,
933
- std::shared_ptr<DeviceEncryptor> encryptor)
923
+ FileDevice::FileDevice (
924
+ std::vector<folly::File>&& fvec,
925
+ std::unordered_map<int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
926
+ uint64_t fileSize,
927
+ uint32_t blockSize,
928
+ uint32_t stripeSize,
929
+ uint32_t maxIOSize,
930
+ uint32_t maxDeviceWriteSize,
931
+ IoEngine ioEngine,
932
+ uint32_t qDepthPerContext,
933
+ std::shared_ptr<DeviceEncryptor> encryptor)
934
934
: Device(fileSize * fvec.size(),
935
935
std::move(encryptor),
936
936
blockSize,
937
937
maxIOSize,
938
938
maxDeviceWriteSize),
939
939
fvec_(std::move(fvec)),
940
- fdpNvmeVec_ (std::move(fdpNvmeVec )),
940
+ fdpNvmeDevs_ (std::move(fdpNvmeDevs )),
941
941
stripeSize_(stripeSize),
942
942
ioEngine_(ioEngine),
943
943
qDepthPerContext_(qDepthPerContext) {
@@ -974,7 +974,7 @@ FileDevice::FileDevice(std::vector<folly::File>&& fvec,
974
974
" num_fdp_devices {}" ,
975
975
fvec_.size (), getSize (), blockSize, stripeSize, maxDeviceWriteSize,
976
976
maxIOSize, getIoEngineName (ioEngine_), qDepthPerContext_,
977
- fdpNvmeVec_ .size ());
977
+ fdpNvmeDevs_ .size ());
978
978
}
979
979
980
980
bool FileDevice::readImpl (uint64_t offset, uint32_t size, void * value) {
@@ -1030,7 +1030,7 @@ IoContext* FileDevice::getIoContext() {
1030
1030
std::unique_ptr<folly::AsyncBase> asyncBase;
1031
1031
if (useIoUring) {
1032
1032
#ifndef CACHELIB_IOURING_DISABLE
1033
- if (fdpNvmeVec_ .size () > 0 ) {
1033
+ if (fdpNvmeDevs_ .size () > 0 ) {
1034
1034
// Big sqe/cqe is mandatory for NVMe passthrough
1035
1035
// https://elixir.bootlin.com/linux/v6.7/source/drivers/nvme/host/ioctl.c#L742
1036
1036
folly::IoUringOp::Options options;
@@ -1051,7 +1051,7 @@ IoContext* FileDevice::getIoContext() {
1051
1051
auto idx = incrementalIdx_++;
1052
1052
tlContext_.reset (new AsyncIoContext (std::move (asyncBase), idx, evb,
1053
1053
qDepthPerContext_, useIoUring,
1054
- fdpNvmeVec_ ));
1054
+ fdpNvmeDevs_ ));
1055
1055
1056
1056
{
1057
1057
// Keep pointers in a vector to ease the gdb debugging
@@ -1067,10 +1067,20 @@ IoContext* FileDevice::getIoContext() {
1067
1067
}
1068
1068
1069
1069
int FileDevice::allocatePlacementHandle () {
1070
- static constexpr uint16_t kDefaultFdpIdx = 0u ;
1071
1070
#ifndef CACHELIB_IOURING_DISABLE
1072
- if (fdpNvmeVec_.size () > 0 ) {
1073
- return fdpNvmeVec_[kDefaultFdpIdx ]->allocateFdpHandle ();
1071
+ if (fdpNvmeDevs_.size () > 0 ) {
1072
+ auto fdpHandle = -1 ;
1073
+ // Ensuring that same FDP placement handle is allocated for all FdpNvme
1074
+ // devices for RAID, and returns the allocated handle if successful,
1075
+ // or -1 if there is a conflict
1076
+ for (auto & nvmeFdp : fdpNvmeDevs_) {
1077
+ auto tempHandle = nvmeFdp.second ->allocateFdpHandle ();
1078
+ if (fdpHandle != -1 && (tempHandle != fdpHandle)) {
1079
+ return -1 ;
1080
+ }
1081
+ fdpHandle = tempHandle;
1082
+ }
1083
+ return fdpHandle;
1074
1084
}
1075
1085
#endif
1076
1086
return -1 ;
@@ -1186,31 +1196,26 @@ std::unique_ptr<Device> createDirectIoFileDevice(
1186
1196
XDCHECK (folly::isPowTwo (blockSize));
1187
1197
1188
1198
uint32_t maxIOSize = maxDeviceWriteSize;
1189
- std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec{} ;
1199
+ std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs ;
1190
1200
#ifndef CACHELIB_IOURING_DISABLE
1191
1201
if (isFDPEnabled) {
1192
1202
try {
1193
- if (filePaths.size () > 1 ) {
1194
- throw std::invalid_argument (folly::sformat (
1195
- " {} input files; but FDP mode does not support RAID files yet" ,
1196
- filePaths.size ()));
1197
- }
1198
1203
1199
- for (const auto & path : filePaths) {
1200
- auto fdpNvme = std::make_shared<FdpNvme>(path );
1204
+ for (size_t i = 0 ; i < filePaths. size (); i++ ) {
1205
+ auto fdpNvme = std::make_shared<FdpNvme>(filePaths[i] );
1201
1206
1202
1207
auto maxDevIOSize = fdpNvme->getMaxIOSize ();
1203
1208
if (maxDevIOSize != 0u &&
1204
1209
(maxIOSize == 0u || maxDevIOSize < maxIOSize)) {
1205
1210
maxIOSize = maxDevIOSize;
1206
1211
}
1207
1212
1208
- fdpNvmeVec. push_back ( std::move (fdpNvme));
1213
+ fdpNvmeDevs. insert ({ fVec [i]. fd (), std::move (fdpNvme)} );
1209
1214
}
1210
1215
} catch (const std::exception& e) {
1211
1216
XLOGF (ERR, " NVMe FDP mode could not be enabled {}, Errno: {}" , e.what (),
1212
1217
errno);
1213
- fdpNvmeVec .clear ();
1218
+ fdpNvmeDevs .clear ();
1214
1219
maxIOSize = 0u ;
1215
1220
}
1216
1221
}
@@ -1221,7 +1226,7 @@ std::unique_ptr<Device> createDirectIoFileDevice(
1221
1226
}
1222
1227
1223
1228
return std::make_unique<FileDevice>(std::move (fVec ),
1224
- std::move (fdpNvmeVec ),
1229
+ std::move (fdpNvmeDevs ),
1225
1230
fileSize,
1226
1231
blockSize,
1227
1232
stripeSize,
0 commit comments