@@ -1582,6 +1582,7 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
1582
1582
evictionData[i].candidate ->toString ()));
1583
1583
}
1584
1584
}
1585
+
1585
1586
if (dmlBatchSize) {
1586
1587
handler = dml::submit<dml::hardware>(dml::batch, sequence);
1587
1588
if (!handler.valid ()) {
@@ -1613,6 +1614,7 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
1613
1614
util::LatencyTracker smallItemWait{stats ().evictDmlSmallItemWaitLatency_ , smallBatch};
1614
1615
result = handler.get ();
1615
1616
}
1617
+
1616
1618
if (result.status != dml::status_code::ok) {
1617
1619
/* Re-try using CPU memmove */
1618
1620
for (auto i = 0U ; i < dmlBatchSize; i++) {
@@ -1629,6 +1631,91 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
1629
1631
}
1630
1632
}
1631
1633
1634
+ template <typename CacheTrait>
1635
+ void CacheAllocator<CacheTrait>::promoteRegularItems(TierId tid, PoolId pid, ClassId cid,
1636
+ std::vector<Item*>& candidates,
1637
+ std::vector<WriteHandle>& newItemHdls,
1638
+ bool skipAddInMMContainer,
1639
+ bool fromBgThread,
1640
+ std::vector<bool >& moved) {
1641
+ /* Split batch for DSA-based move */
1642
+ const auto & pool = allocator_[tid]->getPool (pid);
1643
+ const auto & allocSizes = pool.getAllocSizes ();
1644
+ auto isLarge = allocSizes[cid] >= config_.largeItemMinSize ;
1645
+ auto dmlBatchRatio = isLarge ? config_.largeItemBatchPromoteDsaUsageFraction :
1646
+ config_.smallItemBatchPromoteDsaUsageFraction ;
1647
+ size_t dmlBatchSize =
1648
+ (config_.dsaEnabled && candidates.size () >= config_.minBatchSizeForDsaUsage ) ?
1649
+ static_cast <size_t >(candidates.size () * dmlBatchRatio) : 0 ;
1650
+ auto sequence = dml::sequence<allocator_t >(dmlBatchSize);
1651
+ batch_handler_t handler{};
1652
+
1653
+ /* Move a calculated portion of the batch using DSA (if enabled) */
1654
+ for (auto i = 0U ; i < dmlBatchSize; i++) {
1655
+ XDCHECK (!candidates[i]->isExpired ());
1656
+ XDCHECK_EQ (newItemHdls[i]->getSize (), candidates[i]->getSize ());
1657
+ if (candidates[i]->isNvmClean ()) {
1658
+ newItemHdls[i]->markNvmClean ();
1659
+ }
1660
+ dml::const_data_view srcView = dml::make_view (
1661
+ reinterpret_cast <uint8_t *>(candidates[i]->getMemory ()), candidates[i]->getSize ());
1662
+ dml::data_view dstView = dml::make_view (
1663
+ reinterpret_cast <uint8_t *>(newItemHdls[i]->getMemory ()), newItemHdls[i]->getSize ());
1664
+ if (sequence.add (dml::mem_copy, srcView, dstView) != dml::status_code::ok) {
1665
+ throw std::runtime_error (folly::sformat (
1666
+ " failed to add dml::mem_copy operation to the sequence for item: {}" ,
1667
+ candidates[i]->toString ()));
1668
+ }
1669
+ }
1670
+
1671
+ if (dmlBatchSize) {
1672
+ handler = dml::submit<dml::hardware>(dml::batch, sequence);
1673
+ if (!handler.valid ()) {
1674
+ auto status = handler.get ();
1675
+ XDCHECK (handler.valid ()) << dmlErrStr (status);
1676
+ throw std::runtime_error (folly::sformat (
1677
+ " Failed dml sequence hw submission: {}" , dmlErrStr (status)));
1678
+ }
1679
+ (*stats_.promoteDmlBatchSubmits )[tid][pid][cid].inc ();
1680
+ }
1681
+
1682
+ /* Move the remaining batch using CPU memmove */
1683
+ for (auto i = dmlBatchSize; i < candidates.size (); i++) {
1684
+ moved[i] = moveRegularItem (*candidates[i], newItemHdls[i],
1685
+ skipAddInMMContainer, fromBgThread);
1686
+ }
1687
+
1688
+ /* If DSA batch move not in use */
1689
+ if (!dmlBatchSize) {
1690
+ return ;
1691
+ }
1692
+
1693
+ /* Complete the DSA based batch move */
1694
+ dml::batch_result result{};
1695
+ {
1696
+ size_t largeBatch = isLarge ? dmlBatchSize : 0 ;
1697
+ size_t smallBatch = dmlBatchSize - largeBatch;
1698
+ util::LatencyTracker largeItemWait{stats ().promoteDmlLargeItemWaitLatency_ , largeBatch};
1699
+ util::LatencyTracker smallItemWait{stats ().promoteDmlSmallItemWaitLatency_ , smallBatch};
1700
+ result = handler.get ();
1701
+ }
1702
+
1703
+ if (result.status != dml::status_code::ok) {
1704
+ /* Re-try using CPU memmove */
1705
+ for (auto i = 0U ; i < dmlBatchSize; i++) {
1706
+ moved[i] = moveRegularItem (*candidates[i], newItemHdls[i],
1707
+ skipAddInMMContainer, fromBgThread);
1708
+ }
1709
+ (*stats_.promoteDmlBatchFails )[tid][pid][cid].inc ();
1710
+ return ;
1711
+ }
1712
+
1713
+ /* Complete book keeping for items moved successfully via DSA based batch move */
1714
+ for (auto i = 0U ; i < dmlBatchSize; i++) {
1715
+ moved[i] = moveRegularItemBookKeeper (*candidates[i], newItemHdls[i]);
1716
+ }
1717
+ }
1718
+
1632
1719
template <typename CacheTrait>
1633
1720
bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
1634
1721
WriteHandle& newItemHdl,
@@ -1921,7 +2008,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1921
2008
candidateHandles.push_back (std::move (candidateHandle_));
1922
2009
}
1923
2010
};
1924
-
2011
+
1925
2012
mmContainer.withPromotionIterator (iterateAndMark);
1926
2013
1927
2014
if (candidates.size () < batch) {
@@ -1934,7 +2021,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1934
2021
return candidates;
1935
2022
}
1936
2023
}
1937
-
2024
+
1938
2025
// 1. get and item handle from a new allocation
1939
2026
for (int i = 0 ; i < candidates.size (); i++) {
1940
2027
Item *candidate = candidates[i];
@@ -1954,6 +2041,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1954
2041
folly::sformat (" Was not to acquire new alloc, failed alloc {}" , blankAllocs[i]));
1955
2042
}
1956
2043
}
2044
+
1957
2045
// 2. add in batch to mmContainer
1958
2046
auto & newMMContainer = getMMContainer (tid-1 , pid, cid);
1959
2047
uint32_t added = newMMContainer.addBatch (newAllocs.begin (), newAllocs.end ());
@@ -1963,12 +2051,15 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1963
2051
folly::sformat (" Was not able to add all new items, failed item {} and handle {}" ,
1964
2052
newAllocs[added]->toString (),newHandles[added]->toString ()));
1965
2053
}
2054
+
1966
2055
// 3. copy item data - don't need to add in mmContainer
2056
+ std::vector<bool > moved (candidates.size ());
2057
+ promoteRegularItems (tid, pid, cid, candidates, newHandles, true , true , moved);
2058
+
1967
2059
for (int i = 0 ; i < candidates.size (); i++) {
1968
2060
Item *candidate = candidates[i];
1969
2061
WriteHandle newHandle = std::move (newHandles[i]);
1970
- bool moved = moveRegularItem (*candidate,newHandle, true , true );
1971
- if (moved) {
2062
+ if (moved[i]) {
1972
2063
XDCHECK (candidate->getKey () == newHandle->getKey ());
1973
2064
if (markMoving) {
1974
2065
auto ref = candidate->unmarkMoving ();
@@ -1980,7 +2071,6 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1980
2071
}
1981
2072
} else {
1982
2073
typename NvmCacheT::PutToken token{};
1983
-
1984
2074
removeFromMMContainer (*newAllocs[i]);
1985
2075
auto ret = handleFailedMove (candidate,token,false ,markMoving);
1986
2076
XDCHECK (ret);
@@ -1989,7 +2079,6 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
1989
2079
releaseBackToAllocator (*candidate, RemoveContext::kNormal , false );
1990
2080
XDCHECK (res == ReleaseRes::kReleased );
1991
2081
}
1992
-
1993
2082
}
1994
2083
}
1995
2084
return candidates;
0 commit comments