Skip to content

Commit d0ea211

Browse files
[NFC][SYCL][Graph] Avoid unnecessary shared_ptr in duplicateNodes (#19372)
Changes are mostly local to the routine, but I've updated `MIDCache` to use raw `node_impl *` while on it too because `duplicateNodes` accounts for 40% of the places that need updates after the change of the `MIDCache`.
1 parent c2eaf0f commit d0ea211

File tree

2 files changed

+51
-52
lines changed

2 files changed

+51
-52
lines changed

sycl/source/detail/graph/graph_impl.cpp

Lines changed: 50 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,40 +1225,38 @@ exec_graph_impl::enqueue(sycl::detail::queue_impl &Queue,
12251225

12261226
void exec_graph_impl::duplicateNodes() {
12271227
// Map of original modifiable nodes (keys) to new duplicated nodes (values)
1228-
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;
1228+
std::map<node_impl *, node_impl *> NodesMap;
12291229

1230-
const std::vector<std::shared_ptr<node_impl>> &ModifiableNodes =
1231-
MGraphImpl->MNodeStorage;
1230+
nodes_range ModifiableNodes{MGraphImpl->MNodeStorage};
12321231
std::deque<std::shared_ptr<node_impl>> NewNodes;
12331232

1234-
for (size_t i = 0; i < ModifiableNodes.size(); i++) {
1235-
auto OriginalNode = ModifiableNodes[i];
1236-
std::shared_ptr<node_impl> NodeCopy =
1237-
std::make_shared<node_impl>(*OriginalNode);
1233+
for (node_impl &OriginalNode : ModifiableNodes) {
1234+
NewNodes.push_back(std::make_shared<node_impl>(OriginalNode));
1235+
node_impl &NodeCopy = *NewNodes.back();
12381236

12391237
// Associate the ID of the original node with the node copy for later quick
12401238
// access
1241-
MIDCache.insert(std::make_pair(OriginalNode->MID, NodeCopy));
1239+
MIDCache.insert(std::make_pair(OriginalNode.MID, &NodeCopy));
12421240

12431241
// Clear edges between nodes so that we can replace with new ones
1244-
NodeCopy->MSuccessors.clear();
1245-
NodeCopy->MPredecessors.clear();
1242+
NodeCopy.MSuccessors.clear();
1243+
NodeCopy.MPredecessors.clear();
12461244
// Push the new node to the front of the stack
1247-
NewNodes.push_back(NodeCopy);
12481245
// Associate the new node with the old one for updating edges
1249-
NodesMap.insert({OriginalNode, NodeCopy});
1246+
NodesMap.insert({&OriginalNode, &NodeCopy});
12501247
}
12511248

12521249
// Now that all nodes have been copied rebuild edges on new nodes. This must
12531250
// be done as a separate step since successors may be out of order.
1254-
for (size_t i = 0; i < ModifiableNodes.size(); i++) {
1255-
auto OriginalNode = ModifiableNodes[i];
1256-
auto NodeCopy = NewNodes[i];
1251+
auto OrigIt = ModifiableNodes.begin(), OrigEnd = ModifiableNodes.end();
1252+
for (auto NewIt = NewNodes.begin(); OrigIt != OrigEnd; ++OrigIt, ++NewIt) {
1253+
node_impl &OriginalNode = *OrigIt;
1254+
node_impl &NodeCopy = **NewIt;
12571255
// Look through all the original node successors, find their copies and
12581256
// register those as successors with the current copied node
1259-
for (node_impl &NextNode : OriginalNode->successors()) {
1260-
node_impl &Successor = *NodesMap.at(NextNode.shared_from_this());
1261-
NodeCopy->registerSuccessor(Successor);
1257+
for (node_impl &NextNode : OriginalNode.successors()) {
1258+
node_impl &Successor = *NodesMap.at(&NextNode);
1259+
NodeCopy.registerSuccessor(Successor);
12621260
}
12631261
}
12641262

@@ -1271,49 +1269,47 @@ void exec_graph_impl::duplicateNodes() {
12711269
if (NewNode->MNodeType != node_type::subgraph) {
12721270
continue;
12731271
}
1274-
const std::vector<std::shared_ptr<node_impl>> &SubgraphNodes =
1275-
NewNode->MSubGraphImpl->MNodeStorage;
1272+
nodes_range SubgraphNodes{NewNode->MSubGraphImpl->MNodeStorage};
12761273
std::deque<std::shared_ptr<node_impl>> NewSubgraphNodes{};
12771274

12781275
// Map of original subgraph nodes (keys) to new duplicated nodes (values)
1279-
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>>
1280-
SubgraphNodesMap;
1276+
std::map<node_impl *, node_impl *> SubgraphNodesMap;
12811277

12821278
// Copy subgraph nodes
1283-
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
1284-
auto SubgraphNode = SubgraphNodes[i];
1285-
auto NodeCopy = std::make_shared<node_impl>(*SubgraphNode);
1279+
for (node_impl &SubgraphNode : SubgraphNodes) {
1280+
NewSubgraphNodes.push_back(std::make_shared<node_impl>(SubgraphNode));
1281+
node_impl &NodeCopy = *NewSubgraphNodes.back();
12861282
// Associate the ID of the original subgraph node with all extracted node
12871283
// copies for future quick access.
1288-
MIDCache.insert(std::make_pair(SubgraphNode->MID, NodeCopy));
1284+
MIDCache.insert(std::make_pair(SubgraphNode.MID, &NodeCopy));
12891285

1290-
NewSubgraphNodes.push_back(NodeCopy);
1291-
SubgraphNodesMap.insert({SubgraphNode, NodeCopy});
1292-
NodeCopy->MSuccessors.clear();
1293-
NodeCopy->MPredecessors.clear();
1286+
SubgraphNodesMap.insert({&SubgraphNode, &NodeCopy});
1287+
NodeCopy.MSuccessors.clear();
1288+
NodeCopy.MPredecessors.clear();
12941289
}
12951290

12961291
// Rebuild edges for new subgraph nodes
1297-
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
1298-
auto SubgraphNode = SubgraphNodes[i];
1299-
auto NodeCopy = NewSubgraphNodes[i];
1300-
1301-
for (node_impl &NextNode : SubgraphNode->successors()) {
1302-
node_impl &Successor =
1303-
*SubgraphNodesMap.at(NextNode.shared_from_this());
1304-
NodeCopy->registerSuccessor(Successor);
1292+
auto OrigIt = SubgraphNodes.begin(), OrigEnd = SubgraphNodes.end();
1293+
for (auto NewIt = NewSubgraphNodes.begin(); OrigIt != OrigEnd;
1294+
++OrigIt, ++NewIt) {
1295+
node_impl &SubgraphNode = *OrigIt;
1296+
node_impl &NodeCopy = **NewIt;
1297+
1298+
for (node_impl &NextNode : SubgraphNode.successors()) {
1299+
node_impl &Successor = *SubgraphNodesMap.at(&NextNode);
1300+
NodeCopy.registerSuccessor(Successor);
13051301
}
13061302
}
13071303

13081304
// Collect input and output nodes for the subgraph
1309-
std::vector<std::shared_ptr<node_impl>> Inputs;
1310-
std::vector<std::shared_ptr<node_impl>> Outputs;
1311-
for (auto &NodeImpl : NewSubgraphNodes) {
1305+
std::vector<node_impl *> Inputs;
1306+
std::vector<node_impl *> Outputs;
1307+
for (std::shared_ptr<node_impl> &NodeImpl : NewSubgraphNodes) {
13121308
if (NodeImpl->MPredecessors.size() == 0) {
1313-
Inputs.push_back(NodeImpl);
1309+
Inputs.push_back(&*NodeImpl);
13141310
}
13151311
if (NodeImpl->MSuccessors.size() == 0) {
1316-
Outputs.push_back(NodeImpl);
1312+
Outputs.push_back(&*NodeImpl);
13171313
}
13181314
}
13191315

@@ -1333,7 +1329,7 @@ void exec_graph_impl::duplicateNodes() {
13331329

13341330
// Add all input nodes from the subgraph as successors for this node
13351331
// instead
1336-
for (auto &Input : Inputs) {
1332+
for (node_impl *Input : Inputs) {
13371333
PredNode.registerSuccessor(*Input);
13381334
}
13391335
}
@@ -1352,7 +1348,7 @@ void exec_graph_impl::duplicateNodes() {
13521348

13531349
// Add all Output nodes from the subgraph as predecessors for this node
13541350
// instead
1355-
for (auto &Output : Outputs) {
1351+
for (node_impl *Output : Outputs) {
13561352
Output->registerSuccessor(SuccNode);
13571353
}
13581354
}
@@ -1363,15 +1359,18 @@ void exec_graph_impl::duplicateNodes() {
13631359
NewNodes.erase(std::find(NewNodes.begin(), NewNodes.end(), NewNode));
13641360
// Also set the iterator to the newly added nodes so we can continue
13651361
// iterating over all remaining nodes
1366-
auto InsertIt = NewNodes.insert(OldPositionIt, NewSubgraphNodes.begin(),
1367-
NewSubgraphNodes.end());
1362+
auto InsertIt = NewNodes.insert(
1363+
OldPositionIt, std::make_move_iterator(NewSubgraphNodes.begin()),
1364+
std::make_move_iterator(NewSubgraphNodes.end()));
13681365
// Since the new reverse_iterator will be at i - 1 we need to advance it
13691366
// when constructing
13701367
NewNodeIt = std::make_reverse_iterator(std::next(InsertIt));
13711368
}
13721369

13731370
// Store all the new nodes locally
1374-
MNodeStorage.insert(MNodeStorage.begin(), NewNodes.begin(), NewNodes.end());
1371+
MNodeStorage.insert(MNodeStorage.begin(),
1372+
std::make_move_iterator(NewNodes.begin()),
1373+
std::make_move_iterator(NewNodes.end()));
13751374
}
13761375

13771376
void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
@@ -1436,7 +1435,7 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
14361435

14371436
for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
14381437
MIDCache.insert(
1439-
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i]));
1438+
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i].get()));
14401439
}
14411440

14421441
update(GraphImpl->MNodeStorage);
@@ -1709,7 +1708,7 @@ void exec_graph_impl::populateURKernelUpdateStructs(
17091708
auto ExecNode = MIDCache.find(Node.MID);
17101709
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");
17111710

1712-
auto Command = MCommandMap.find(ExecNode->second.get());
1711+
auto Command = MCommandMap.find(ExecNode->second);
17131712
assert(Command != MCommandMap.end());
17141713
UpdateDesc.hCommand = Command->second;
17151714

@@ -1738,7 +1737,7 @@ exec_graph_impl::getURUpdatableNodes(nodes_range Nodes) const {
17381737

17391738
auto ExecNode = MIDCache.find(Node.MID);
17401739
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");
1741-
auto PartitionIndex = MPartitionNodes.find(ExecNode->second.get());
1740+
auto PartitionIndex = MPartitionNodes.find(ExecNode->second);
17421741
assert(PartitionIndex != MPartitionNodes.end());
17431742
PartitionedNodes[PartitionIndex->second].push_back(&Node);
17441743
}

sycl/source/detail/graph/graph_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ class exec_graph_impl {
917917

918918
// Stores a cache of node ids from modifiable graph nodes to the companion
919919
// node(s) in this graph. Used for quick access when updating this graph.
920-
std::multimap<node_impl::id_type, std::shared_ptr<node_impl>> MIDCache;
920+
std::multimap<node_impl::id_type, node_impl *> MIDCache;
921921

922922
unsigned long long MID;
923923
// Used for std::hash in order to create a unique hash for the instance.

0 commit comments

Comments
 (0)