Skip to content

Commit 1a07b4e

Browse files
committed
Adding RAID 0 support for fdp path
Summary: This enables RAID0 in fdp io path by spliting io across all devices. Signed-off-by: Vikash Kumar <[email protected]>
1 parent 78233f0 commit 1a07b4e

File tree

1 file changed

+55
-50
lines changed

1 file changed

+55
-50
lines changed

cachelib/navy/common/Device.cpp

Lines changed: 55 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class AsyncIoContext : public IoContext {
225225
folly::EventBase* evb,
226226
size_t capacity,
227227
bool useIoUring,
228-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec);
228+
std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs);
229229

230230
~AsyncIoContext() override = default;
231231

@@ -279,18 +279,16 @@ class AsyncIoContext : public IoContext {
279279
size_t numSubmitted_ = 0;
280280
size_t numCompleted_ = 0;
281281

282-
// Device info vector for FDP support
283-
const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
284-
// As of now, only one FDP enabled Device is supported
285-
static constexpr uint16_t kDefaultFdpIdx = 0u;
282+
// Map of file descriptors to FdpNvme device objects
283+
const std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs_;
286284
};
287285

288286
// An FileDevice manages direct I/O to either a single or multiple (RAID0)
289287
// block device(s) or regular file(s).
290288
class FileDevice : public Device {
291289
public:
292290
FileDevice(std::vector<folly::File>&& fvec,
293-
std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
291+
std::unordered_map<int, std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
294292
uint64_t size,
295293
uint32_t blockSize,
296294
uint32_t stripeSize,
@@ -317,8 +315,8 @@ class FileDevice : public Device {
317315
// File vector for devices or regular files
318316
const std::vector<folly::File> fvec_{};
319317

320-
// Device info vector for FDP support
321-
const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
318+
// Map of file descriptors to FdpNvme device objects
319+
const std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs_;
322320

323321
// RAID stripe size when multiple devices are used
324322
const uint32_t stripeSize_;
@@ -750,20 +748,21 @@ bool SyncIoContext::submitIo(IOOp& op) {
750748
/*
751749
* AsyncIoContext
752750
*/
753-
AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
754-
size_t id,
755-
folly::EventBase* evb,
756-
size_t capacity,
757-
bool useIoUring,
758-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec)
751+
AsyncIoContext::AsyncIoContext(
752+
std::unique_ptr<folly::AsyncBase>&& asyncBase,
753+
size_t id,
754+
folly::EventBase* evb,
755+
size_t capacity,
756+
bool useIoUring,
757+
std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs)
759758
: asyncBase_(std::move(asyncBase)),
760759
id_(id),
761760
qDepth_(capacity),
762761
useIoUring_(useIoUring),
763-
fdpNvmeVec_(fdpNvmeVec) {
762+
fdpNvmeDevs_(fdpNvmeDevs) {
764763
#ifdef CACHELIB_IOURING_DISABLE
765764
// io_uring is not available on the system
766-
XDCHECK(!useIoUring_ && !(fdpNvmeVec_.size() > 0));
765+
XDCHECK(!useIoUring_ && !(fdpNvmeDevs_.size() > 0));
767766
useIoUring_ = false;
768767
#endif
769768
if (evb) {
@@ -781,7 +780,7 @@ AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
781780
"[{}] Created new async io context with qdepth {}{} io_engine {} {}",
782781
getName(), qDepth_, qDepth_ == 1 ? " (sync wait)" : "",
783782
useIoUring_ ? "io_uring" : "libaio",
784-
(fdpNvmeVec_.size() > 0) ? "FDP enabled" : "");
783+
(fdpNvmeDevs_.size() > 0) ? "FDP enabled" : "");
785784
}
786785

787786
void AsyncIoContext::pollCompletion() {
@@ -820,7 +819,7 @@ void AsyncIoContext::handleCompletion(
820819
}
821820

822821
auto len = aop->result();
823-
if (fdpNvmeVec_.size() > 0) {
822+
if (fdpNvmeDevs_.size() > 0) {
824823
// 0 means success here, so get the completed size from iop
825824
len = !len ? iop->size_ : 0;
826825
}
@@ -869,7 +868,7 @@ bool AsyncIoContext::submitIo(IOOp& op) {
869868
}
870869

871870
std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepAsyncIo(IOOp& op) {
872-
if (fdpNvmeVec_.size() > 0) {
871+
if (fdpNvmeDevs_.size() > 0) {
873872
return prepNvmeIo(op);
874873
}
875874

@@ -905,10 +904,10 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
905904
iouringCmdOp->initBase();
906905
struct io_uring_sqe& sqe = iouringCmdOp->getSqe();
907906
if (req.opType_ == OpType::READ) {
908-
fdpNvmeVec_[kDefaultFdpIdx]->prepReadUringCmdSqe(sqe, op.data_, op.size_,
909-
op.offset_);
907+
fdpNvmeDevs_.at(op.fd_)->prepReadUringCmdSqe(sqe, op.data_, op.size_,
908+
op.offset_);
910909
} else {
911-
fdpNvmeVec_[kDefaultFdpIdx]->prepWriteUringCmdSqe(
910+
fdpNvmeDevs_.at(op.fd_)->prepWriteUringCmdSqe(
912911
sqe, op.data_, op.size_, op.offset_, op.placeHandle_.value_or(-1));
913912
}
914913
io_uring_sqe_set_data(&sqe, iouringCmdOp.get());
@@ -921,23 +920,24 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
921920
/*
922921
* FileDevice
923922
*/
924-
FileDevice::FileDevice(std::vector<folly::File>&& fvec,
925-
std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
926-
uint64_t fileSize,
927-
uint32_t blockSize,
928-
uint32_t stripeSize,
929-
uint32_t maxIOSize,
930-
uint32_t maxDeviceWriteSize,
931-
IoEngine ioEngine,
932-
uint32_t qDepthPerContext,
933-
std::shared_ptr<DeviceEncryptor> encryptor)
923+
FileDevice::FileDevice(
924+
std::vector<folly::File>&& fvec,
925+
std::unordered_map<int, std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
926+
uint64_t fileSize,
927+
uint32_t blockSize,
928+
uint32_t stripeSize,
929+
uint32_t maxIOSize,
930+
uint32_t maxDeviceWriteSize,
931+
IoEngine ioEngine,
932+
uint32_t qDepthPerContext,
933+
std::shared_ptr<DeviceEncryptor> encryptor)
934934
: Device(fileSize * fvec.size(),
935935
std::move(encryptor),
936936
blockSize,
937937
maxIOSize,
938938
maxDeviceWriteSize),
939939
fvec_(std::move(fvec)),
940-
fdpNvmeVec_(std::move(fdpNvmeVec)),
940+
fdpNvmeDevs_(std::move(fdpNvmeDevs)),
941941
stripeSize_(stripeSize),
942942
ioEngine_(ioEngine),
943943
qDepthPerContext_(qDepthPerContext) {
@@ -974,7 +974,7 @@ FileDevice::FileDevice(std::vector<folly::File>&& fvec,
974974
"num_fdp_devices {}",
975975
fvec_.size(), getSize(), blockSize, stripeSize, maxDeviceWriteSize,
976976
maxIOSize, getIoEngineName(ioEngine_), qDepthPerContext_,
977-
fdpNvmeVec_.size());
977+
fdpNvmeDevs_.size());
978978
}
979979

980980
bool FileDevice::readImpl(uint64_t offset, uint32_t size, void* value) {
@@ -1030,7 +1030,7 @@ IoContext* FileDevice::getIoContext() {
10301030
std::unique_ptr<folly::AsyncBase> asyncBase;
10311031
if (useIoUring) {
10321032
#ifndef CACHELIB_IOURING_DISABLE
1033-
if (fdpNvmeVec_.size() > 0) {
1033+
if (fdpNvmeDevs_.size() > 0) {
10341034
// Big sqe/cqe is mandatory for NVMe passthrough
10351035
// https://elixir.bootlin.com/linux/v6.7/source/drivers/nvme/host/ioctl.c#L742
10361036
folly::IoUringOp::Options options;
@@ -1051,7 +1051,7 @@ IoContext* FileDevice::getIoContext() {
10511051
auto idx = incrementalIdx_++;
10521052
tlContext_.reset(new AsyncIoContext(std::move(asyncBase), idx, evb,
10531053
qDepthPerContext_, useIoUring,
1054-
fdpNvmeVec_));
1054+
fdpNvmeDevs_));
10551055

10561056
{
10571057
// Keep pointers in a vector to ease the gdb debugging
@@ -1067,10 +1067,20 @@ IoContext* FileDevice::getIoContext() {
10671067
}
10681068

10691069
int FileDevice::allocatePlacementHandle() {
1070-
static constexpr uint16_t kDefaultFdpIdx = 0u;
10711070
#ifndef CACHELIB_IOURING_DISABLE
1072-
if (fdpNvmeVec_.size() > 0) {
1073-
return fdpNvmeVec_[kDefaultFdpIdx]->allocateFdpHandle();
1071+
if (fdpNvmeDevs_.size() > 0) {
1072+
auto fdpHandle = -1;
1073+
//Ensuring that same FDP placement handle is allocated for all FdpNvme
1074+
//devices for RAID, and returns the allocated handle if successful,
1075+
//or -1 if there is a conflict
1076+
for (auto& nvmeFdp : fdpNvmeDevs_) {
1077+
auto tempHandle = nvmeFdp.second->allocateFdpHandle();
1078+
if (fdpHandle != -1 && (tempHandle != fdpHandle)) {
1079+
return -1;
1080+
}
1081+
fdpHandle = tempHandle;
1082+
}
1083+
return fdpHandle;
10741084
}
10751085
#endif
10761086
return -1;
@@ -1186,31 +1196,26 @@ std::unique_ptr<Device> createDirectIoFileDevice(
11861196
XDCHECK(folly::isPowTwo(blockSize));
11871197

11881198
uint32_t maxIOSize = maxDeviceWriteSize;
1189-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec{};
1199+
std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs;
11901200
#ifndef CACHELIB_IOURING_DISABLE
11911201
if (isFDPEnabled) {
11921202
try {
1193-
if (filePaths.size() > 1) {
1194-
throw std::invalid_argument(folly::sformat(
1195-
"{} input files; but FDP mode does not support RAID files yet",
1196-
filePaths.size()));
1197-
}
11981203

1199-
for (const auto& path : filePaths) {
1200-
auto fdpNvme = std::make_shared<FdpNvme>(path);
1204+
for (size_t i = 0; i < filePaths.size(); i++) {
1205+
auto fdpNvme = std::make_shared<FdpNvme>(filePaths[i]);
12011206

12021207
auto maxDevIOSize = fdpNvme->getMaxIOSize();
12031208
if (maxDevIOSize != 0u &&
12041209
(maxIOSize == 0u || maxDevIOSize < maxIOSize)) {
12051210
maxIOSize = maxDevIOSize;
12061211
}
12071212

1208-
fdpNvmeVec.push_back(std::move(fdpNvme));
1213+
fdpNvmeDevs.insert({fVec[i].fd(), std::move(fdpNvme)});
12091214
}
12101215
} catch (const std::exception& e) {
12111216
XLOGF(ERR, "NVMe FDP mode could not be enabled {}, Errno: {}", e.what(),
12121217
errno);
1213-
fdpNvmeVec.clear();
1218+
fdpNvmeDevs.clear();
12141219
maxIOSize = 0u;
12151220
}
12161221
}
@@ -1221,7 +1226,7 @@ std::unique_ptr<Device> createDirectIoFileDevice(
12211226
}
12221227

12231228
return std::make_unique<FileDevice>(std::move(fVec),
1224-
std::move(fdpNvmeVec),
1229+
std::move(fdpNvmeDevs),
12251230
fileSize,
12261231
blockSize,
12271232
stripeSize,

0 commit comments

Comments
 (0)