@@ -1582,6 +1582,7 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
15821582          evictionData[i].candidate ->toString ()));
15831583    }
15841584  }
1585+ 
15851586  if  (dmlBatchSize) {
15861587    handler = dml::submit<dml::hardware>(dml::batch, sequence);
15871588    if  (!handler.valid ()) {
@@ -1613,6 +1614,7 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
16131614    util::LatencyTracker smallItemWait{stats ().evictDmlSmallItemWaitLatency_ , smallBatch};
16141615    result = handler.get ();
16151616  }
1617+ 
16161618  if  (result.status  != dml::status_code::ok) {
16171619    /*  Re-try using CPU memmove */ 
16181620    for  (auto  i = 0U ; i < dmlBatchSize; i++) {
@@ -1625,7 +1627,92 @@ void CacheAllocator<CacheTrait>::evictRegularItems(TierId tid, PoolId pid, Class
16251627
16261628  /*  Complete book keeping for items moved successfully via DSA based batch move */ 
16271629  for  (auto  i = 0U ; i < dmlBatchSize; i++) {
1628-     moved[i] = moveRegularItemBookKeeper (*evictionData[i].candidate , newItemHdls[i]);
1630+     moved[i] = completeAccessContainerUpdate (*evictionData[i].candidate , newItemHdls[i]);
1631+   }
1632+ }
1633+ 
1634+ template  <typename  CacheTrait>
1635+ void  CacheAllocator<CacheTrait>::promoteRegularItems(TierId tid, PoolId pid, ClassId cid,
1636+                                                      std::vector<Item*>& candidates,
1637+                                                      std::vector<WriteHandle>& newItemHdls,
1638+                                                      bool  skipAddInMMContainer,
1639+                                                      bool  fromBgThread,
1640+                                                      std::vector<bool >& moved) {
1641+   /*  Split batch for DSA-based move */ 
1642+   const  auto & pool = allocator_[tid]->getPool (pid);
1643+   const  auto & allocSizes = pool.getAllocSizes ();
1644+   auto  isLarge = allocSizes[cid] >= config_.largeItemMinSize ;
1645+   auto  dmlBatchRatio = isLarge ? config_.largeItemBatchPromoteDsaUsageFraction  :
1646+                                  config_.smallItemBatchPromoteDsaUsageFraction ;
1647+   size_t  dmlBatchSize =
1648+       (config_.dsaEnabled  && candidates.size () >= config_.minBatchSizeForDsaUsage ) ?
1649+                     static_cast <size_t >(candidates.size () * dmlBatchRatio) : 0 ;
1650+   auto  sequence = dml::sequence<allocator_t >(dmlBatchSize);
1651+   batch_handler_t  handler{};
1652+ 
1653+   /*  Move a calculated portion of the batch using DSA (if enabled) */ 
1654+   for  (auto  i = 0U ; i < dmlBatchSize; i++) {
1655+     XDCHECK (!candidates[i]->isExpired ());
1656+     XDCHECK_EQ (newItemHdls[i]->getSize (), candidates[i]->getSize ());
1657+     if  (candidates[i]->isNvmClean ()) {
1658+       newItemHdls[i]->markNvmClean ();
1659+     }
1660+     dml::const_data_view srcView = dml::make_view (
1661+         reinterpret_cast <uint8_t *>(candidates[i]->getMemory ()), candidates[i]->getSize ());
1662+     dml::data_view dstView = dml::make_view (
1663+         reinterpret_cast <uint8_t *>(newItemHdls[i]->getMemory ()), newItemHdls[i]->getSize ());
1664+     if  (sequence.add (dml::mem_copy, srcView, dstView) != dml::status_code::ok) {
1665+       throw  std::runtime_error (folly::sformat (
1666+           " failed to add dml::mem_copy operation to the sequence for item: {}" 
1667+           candidates[i]->toString ()));
1668+     }
1669+   }
1670+ 
1671+   if  (dmlBatchSize) {
1672+     handler = dml::submit<dml::hardware>(dml::batch, sequence);
1673+     if  (!handler.valid ()) {
1674+       auto  status = handler.get ();
1675+       XDCHECK (handler.valid ()) << dmlErrStr (status);
1676+       throw  std::runtime_error (folly::sformat (
1677+           " Failed dml sequence hw submission: {}" dmlErrStr (status)));
1678+     }
1679+     (*stats_.promoteDmlBatchSubmits )[tid][pid][cid].inc ();
1680+   }
1681+ 
1682+   /*  Move the remaining batch using CPU memmove */ 
1683+   for  (auto  i = dmlBatchSize; i < candidates.size (); i++) {
1684+     moved[i] = moveRegularItem (*candidates[i], newItemHdls[i],
1685+                                skipAddInMMContainer, fromBgThread);
1686+   }
1687+ 
1688+   /*  If DSA batch move not in use */ 
1689+   if  (!dmlBatchSize) {
1690+     return ;
1691+   }
1692+ 
1693+   /*  Complete the DSA based batch move */ 
1694+   dml::batch_result result{};
1695+   {
1696+     size_t  largeBatch = isLarge ? dmlBatchSize : 0 ;
1697+     size_t  smallBatch = dmlBatchSize - largeBatch;
1698+     util::LatencyTracker largeItemWait{stats ().promoteDmlLargeItemWaitLatency_ , largeBatch};
1699+     util::LatencyTracker smallItemWait{stats ().promoteDmlSmallItemWaitLatency_ , smallBatch};
1700+     result = handler.get ();
1701+   }
1702+ 
1703+   if  (result.status  != dml::status_code::ok) {
1704+     /*  Re-try using CPU memmove */ 
1705+     for  (auto  i = 0U ; i < dmlBatchSize; i++) {
1706+       moved[i] = moveRegularItem (*candidates[i], newItemHdls[i],
1707+                                  skipAddInMMContainer, fromBgThread);
1708+     }
1709+     (*stats_.promoteDmlBatchFails )[tid][pid][cid].inc ();
1710+     return ;
1711+   }
1712+ 
1713+   /*  Complete book keeping for items moved successfully via DSA based batch move */ 
1714+   for  (auto  i = 0U ; i < dmlBatchSize; i++) {
1715+     moved[i] = completeAccessContainerUpdate (*candidates[i], newItemHdls[i]);
16291716  }
16301717}
16311718
@@ -1693,11 +1780,11 @@ bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
16931780    XDCHECK (!oldItem.hasChainedItem ());
16941781    XDCHECK (newItemHdl->hasChainedItem ());
16951782  }
1696-   return  moveRegularItemBookKeeper (oldItem, newItemHdl);
1783+   return  completeAccessContainerUpdate (oldItem, newItemHdl);
16971784}
16981785
16991786template  <typename  CacheTrait>
1700- bool  CacheAllocator<CacheTrait>::moveRegularItemBookKeeper (
1787+ bool  CacheAllocator<CacheTrait>::completeAccessContainerUpdate (
17011788                                Item& oldItem, WriteHandle& newItemHdl) {
17021789  auto  predicate = [&](const  Item& item){
17031790    //  we rely on moving flag being set (it should block all readers)
@@ -1921,7 +2008,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19212008      candidateHandles.push_back (std::move (candidateHandle_));
19222009    }
19232010  };
1924-    
2011+ 
19252012  mmContainer.withPromotionIterator (iterateAndMark);
19262013
19272014  if  (candidates.size () < batch) {
@@ -1934,7 +2021,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19342021      return  candidates;  
19352022    }
19362023  }
1937-    
2024+ 
19382025  // 1. get and item handle from a new allocation
19392026  for  (int  i = 0 ; i < candidates.size (); i++) {
19402027    Item *candidate = candidates[i];
@@ -1954,6 +2041,7 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19542041         folly::sformat (" Was not to acquire new alloc, failed alloc {}" 
19552042    }
19562043  }
2044+ 
19572045  // 2. add in batch to mmContainer
19582046  auto & newMMContainer = getMMContainer (tid-1 , pid, cid);
19592047  uint32_t  added = newMMContainer.addBatch (newAllocs.begin (), newAllocs.end ());
@@ -1963,12 +2051,15 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19632051      folly::sformat (" Was not able to add all new items, failed item {} and handle {}" 
19642052                      newAllocs[added]->toString (),newHandles[added]->toString ()));
19652053  }
2054+ 
19662055  // 3. copy item data - don't need to add in mmContainer
2056+   std::vector<bool > moved (candidates.size ());
2057+   promoteRegularItems (tid, pid, cid, candidates, newHandles, true , true , moved);
2058+ 
19672059  for  (int  i = 0 ; i < candidates.size (); i++) {
19682060    Item *candidate = candidates[i];
19692061    WriteHandle newHandle = std::move (newHandles[i]);
1970-     bool  moved = moveRegularItem (*candidate,newHandle, true , true );
1971-     if  (moved) {
2062+     if  (moved[i]) {
19722063      XDCHECK (candidate->getKey () == newHandle->getKey ());
19732064      if  (markMoving) {
19742065        auto  ref = candidate->unmarkMoving ();
@@ -1980,7 +2071,6 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19802071      }
19812072    } else  {
19822073      typename  NvmCacheT::PutToken token{};
1983-       
19842074      removeFromMMContainer (*newAllocs[i]);
19852075      auto  ret = handleFailedMove (candidate,token,false ,markMoving);
19862076      XDCHECK (ret);
@@ -1989,7 +2079,6 @@ CacheAllocator<CacheTrait>::getNextCandidatesPromotion(TierId tid,
19892079            releaseBackToAllocator (*candidate, RemoveContext::kNormal , false );
19902080        XDCHECK (res == ReleaseRes::kReleased );
19912081      }
1992- 
19932082    }
19942083  }
19952084  return  candidates;
0 commit comments