Skip to content

Commit 0131e66

Browse files
Allow Device creating multiple CSRs [6/n]
- Introduce default Engine query - Improve Deferred Deleter usage - Remove Tag Allocation from Device Change-Id: Iaa88d8dc0166325acf9a157dcd2217ea408ee285 Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent 3e800d5 commit 0131e66

28 files changed

+112
-101
lines changed

runtime/command_queue/command_queue.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@ CommandQueue::CommandQueue(Context *context,
7777
commandQueueProperties = getCmdQueueProperties<cl_command_queue_properties>(properties);
7878
flushStamp.reset(new FlushStampTracker(true));
7979

80-
processProperties();
81-
8280
if (device) {
83-
engine = &device->getEngine(engineId);
81+
engine = &device->getDefaultEngine();
8482
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
8583
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
8684
}
8785
}
86+
87+
processProperties();
8888
}
8989

9090
CommandQueue::~CommandQueue() {

runtime/command_queue/command_queue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
428428

429429
QueuePriority priority;
430430
QueueThrottle throttle;
431-
size_t engineId = 0;
431+
uint32_t engineId = 0;
432432

433433
bool perfCountersEnabled;
434434
cl_uint perfCountersConfig;

runtime/device/device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
129129
pDevice->osTime = OSTime::create(commandStreamReceiver->getOSInterface());
130130
}
131131
pDevice->driverInfo.reset(DriverInfo::create(commandStreamReceiver->getOSInterface()));
132-
pDevice->tagAddress = reinterpret_cast<uint32_t *>(commandStreamReceiver->getTagAllocation()->getUnderlyingBuffer());
133132

134133
pDevice->initializeCaps();
135134

@@ -150,6 +149,7 @@ bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
150149
}
151150

152151
outDevice.executionEnvironment->memoryManager->setForce32BitAllocations(pDevice->getDeviceInfo().force32BitAddressess);
152+
outDevice.executionEnvironment->memoryManager->setDefaultEngineIndex(deviceCsrIndex);
153153

154154
if (pDevice->preemptionMode == PreemptionMode::MidThread || pDevice->isSourceLevelDebuggerActive()) {
155155
size_t requiredSize = pHwInfo->capabilityTable.requiredPreemptionSurfaceSize;

runtime/device/device.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ class Device : public BaseObject<_cl_device_id> {
7070
deviceInfo.force32BitAddressess = value;
7171
}
7272

73-
EngineControl &getEngine(size_t engineId);
73+
EngineControl &getEngine(uint32_t engineId);
74+
EngineControl &getDefaultEngine();
7475

7576
volatile uint32_t *getTagAddress() const;
7677

@@ -146,7 +147,6 @@ class Device : public BaseObject<_cl_device_id> {
146147
HardwareCapabilities hardwareCapabilities = {};
147148
DeviceInfo deviceInfo;
148149

149-
volatile uint32_t *tagAddress = nullptr;
150150
GraphicsAllocation *preemptionAllocation = nullptr;
151151
std::unique_ptr<OSTime> osTime;
152152
std::unique_ptr<DriverInfo> driverInfo;
@@ -161,6 +161,7 @@ class Device : public BaseObject<_cl_device_id> {
161161
PreemptionMode preemptionMode;
162162
ExecutionEnvironment *executionEnvironment = nullptr;
163163
uint32_t deviceIndex = 0u;
164+
uint32_t defaultEngineIndex = 0;
164165
};
165166

166167
template <cl_device_info Param>
@@ -171,12 +172,12 @@ inline void Device::getCap(const void *&src,
171172
retSize = size = DeviceInfoTable::Map<Param>::size;
172173
}
173174

174-
inline EngineControl &Device::getEngine(size_t engineId) {
175+
inline EngineControl &Device::getEngine(uint32_t engineId) {
175176
return engines[engineId];
176177
}
177178

178-
inline volatile uint32_t *Device::getTagAddress() const {
179-
return tagAddress;
179+
inline EngineControl &Device::getDefaultEngine() {
180+
return engines[defaultEngineIndex];
180181
}
181182

182183
inline MemoryManager *Device::getMemoryManager() const {

runtime/device_queue/device_queue_hw.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
230230

231231
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, criticalSectionAddress, ExecutionModelCriticalSection::Free);
232232

233-
uint64_t tagAddress = (uint64_t)device->getTagAddress();
233+
uint64_t tagAddress = reinterpret_cast<uint64_t>(device->getDefaultEngine().commandStreamReceiver->getTagAddress());
234234

235235
addPipeControlCmdWa();
236236

runtime/execution_environment/execution_environment.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class BuiltIns;
2424
struct HardwareInfo;
2525
class OSInterface;
2626

27-
using CsrContainer = std::array<std::unique_ptr<CommandStreamReceiver>, EngineInstanceT::numGpgpuEngineInstances>;
27+
using CsrContainer = std::vector<std::array<std::unique_ptr<CommandStreamReceiver>, EngineInstanceT::numGpgpuEngineInstances>>;
2828

2929
class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment> {
3030
private:
@@ -51,7 +51,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
5151
std::unique_ptr<OSInterface> osInterface;
5252
std::unique_ptr<MemoryManager> memoryManager;
5353
std::unique_ptr<AubCenter> aubCenter;
54-
std::vector<CsrContainer> commandStreamReceivers;
54+
CsrContainer commandStreamReceivers;
5555
std::unique_ptr<BuiltIns> builtins;
5656
std::unique_ptr<CompilerInterface> compilerInterface;
5757
std::unique_ptr<SourceLevelDebugger> sourceLevelDebugger;

runtime/mem_obj/mem_obj.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ void MemObj::releaseAllocatedMapPtr() {
288288
}
289289

290290
void MemObj::waitForCsrCompletion() {
291-
memoryManager->getCommandStreamReceiver(0)->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, graphicsAllocation->getTaskCount(0u));
291+
memoryManager->getDefaultCommandStreamReceiver(0)->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, graphicsAllocation->getTaskCount(0u));
292292
}
293293

294294
void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {

runtime/memory_manager/deferrable_allocation_deletion.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "runtime/command_stream/command_stream_receiver.h"
99
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
1010
#include "runtime/memory_manager/memory_manager.h"
11+
#include "runtime/os_interface/os_context.h"
1112

1213
namespace OCLRT {
1314

@@ -16,11 +17,16 @@ DeferrableAllocationDeletion::DeferrableAllocationDeletion(MemoryManager &memory
1617
void DeferrableAllocationDeletion::apply() {
1718
while (graphicsAllocation.isUsed()) {
1819

19-
for (auto contextId = 0u; contextId < memoryManager.getOsContextCount(); contextId++) {
20-
if (graphicsAllocation.isUsedByContext(contextId)) {
21-
auto currentContextTaskCount = *memoryManager.getCommandStreamReceiver(contextId)->getTagAddress();
22-
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
23-
graphicsAllocation.resetTaskCount(contextId);
20+
for (auto &deviceCsrs : memoryManager.getCommandStreamReceivers()) {
21+
for (auto &csr : deviceCsrs) {
22+
if (csr) {
23+
auto contextId = csr->getOsContext().getContextId();
24+
if (graphicsAllocation.isUsedByContext(contextId)) {
25+
auto currentContextTaskCount = *csr->getTagAddress();
26+
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
27+
graphicsAllocation.resetTaskCount(contextId);
28+
}
29+
}
2430
}
2531
}
2632
}

runtime/memory_manager/host_ptr_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ RequirementsStatus HostPtrManager::checkAllocationsForOverlapping(MemoryManager
282282
if (checkedFragments->status[i] == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
283283
// clean temporary allocations
284284

285-
auto commandStreamReceiver = memoryManager.getCommandStreamReceiver(0);
285+
auto commandStreamReceiver = memoryManager.getDefaultCommandStreamReceiver(0);
286286
auto allocationStorage = commandStreamReceiver->getInternalAllocationStorage();
287287
uint32_t taskCount = *commandStreamReceiver->getTagAddress();
288288
allocationStorage->cleanAllocationList(taskCount, TEMPORARY_ALLOCATION);

runtime/memory_manager/memory_manager.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,10 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
137137
//if not in use destroy in place
138138
//if in use pass to temporary allocation list that is cleaned on blocking calls
139139
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
140-
if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceiver(0)->getTagAddress()) {
140+
if (!gfxAllocation->isUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceivers()[0][defaultEngineIndex]->getTagAddress()) {
141141
freeGraphicsMemory(gfxAllocation);
142142
} else {
143-
getCommandStreamReceiver(0)->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
143+
getCommandStreamReceivers()[0][defaultEngineIndex]->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
144144
}
145145
}
146146

@@ -274,9 +274,13 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData &
274274
}
275275
return allocateGraphicsMemory(allocationData.size, MemoryConstants::pageSize, allocationData.flags.forcePin, allocationData.flags.uncacheable);
276276
}
277-
CommandStreamReceiver *MemoryManager::getCommandStreamReceiver(uint32_t contextId) {
278-
UNRECOVERABLE_IF(executionEnvironment.commandStreamReceivers.size() < 1);
279-
return executionEnvironment.commandStreamReceivers[contextId][0].get();
277+
278+
const CsrContainer &MemoryManager::getCommandStreamReceivers() const {
279+
return executionEnvironment.commandStreamReceivers;
280+
}
281+
282+
CommandStreamReceiver *MemoryManager::getDefaultCommandStreamReceiver(uint32_t deviceId) const {
283+
return executionEnvironment.commandStreamReceivers[deviceId][defaultEngineIndex].get();
280284
}
281285

282286
} // namespace OCLRT

0 commit comments

Comments
 (0)