Skip to content

Commit 15b7ba2

Browse files
committed
Update mempool example
1 parent 2ceaca6 commit 15b7ba2

File tree

4 files changed

+18
-86
lines changed

4 files changed

+18
-86
lines changed

docs/04-memory.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,11 @@ for (int i = 0; i < 100; i++) {
279279
hipMalloc(&ptr, size);
280280
// Run GPU kernel
281281
kernel<<<..., stream>>>(ptr);
282-
// Synchronize stream, does not influence memory allocations
283-
hipStreamSynchronize(stream);
284282
// Deallocate memory here
285283
hipFree(ptr);
286284
}
285+
// Synchronize stream, does not influence memory allocations
286+
hipStreamSynchronize(stream);
287287
```
288288
* Allocating and deallocating memory in a loop is slow, and can have a significant impact on the performance
289289
</div>

memory/02-mempools/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
# The stream-ordered memory allocator and memory pools
22

3-
The purpose of this exercise is to compare different memory allocation
4-
strategies within a loop and to understand the performance impact of using or not using a memory pool. The following timed functions are called at the end of the source file by the `main()` function:
3+
The purpose of this exercise is to compare different memory allocation strategies within a loop and to understand the performance impact of using or not using a memory pool. The following timed functions are called at the end of the source file by the `main()` function:
54

65
* The function `noRecurringAlloc()` allocates memory outside loop only once
76
* The function `recurringAllocNoMemPools()` allocates memory within a loop recurringly
8-
* The function `recurringAllocMemPoolNoSync()` obtains memory from a pool within a loop recurringly (no synchronization within loop)
9-
* The function `recurringAllocMemPoolSync()` obtains memory from a pool within a loop recurringly and synchronizes (synchronization within loop)
7+
* The function `recurringAllocMemPool()` obtains memory from a pool within a loop recurringly
108

119
The task is to fill the missing function calls in the code indicated by lines beginning with `#error`, and followed by a descriptive instruction.
1210

memory/02-mempools/mempools.cpp

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ void noRecurringAlloc(int nSteps, int size)
6161
{
6262
// Launch GPU kernel
6363
hipKernel<<<gridsize, blocksize, 0, 0>>>(d_A, size);
64-
// Synchronization
65-
#error synchronize the default stream here
6664
}
65+
// Synchronization
66+
#error synchronize the default stream here
6767
// Check results and print timings
6868
checkTiming("noRecurringAlloc", (double)(clock() - tStart) / CLOCKS_PER_SEC);
6969

@@ -87,17 +87,17 @@ void recurringAllocNoMemPools(int nSteps, int size)
8787
#error allocate memory with hipMalloc for d_A of size
8888
// Launch GPU kernel
8989
hipKernel<<<gridsize, blocksize, 0, 0>>>(d_A, size);
90-
// Synchronization
91-
#error synchronize the default stream here
9290
// Free allocation
9391
#error free d_A allocation using hipFree
9492
}
93+
// Synchronization
94+
#error synchronize the default stream here
9595
// Check results and print timings
9696
checkTiming("recurringAllocNoMemPools", (double)(clock() - tStart) / CLOCKS_PER_SEC);
9797
}
9898

99-
/* Run using memory pooling but no recurring syncs */
100-
void recurringAllocMemPoolNoSync(int nSteps, int size)
99+
/* Do recurring allocation with memory pooling */
100+
void recurringAllocMemPool(int nSteps, int size)
101101
{
102102
// Create HIP stream
103103
hipStream_t stream;
@@ -128,38 +128,6 @@ void recurringAllocMemPoolNoSync(int nSteps, int size)
128128
hipStreamDestroy(stream);
129129
}
130130

131-
/* Run using memory pooling and recurring syncs */
132-
void recurringAllocMemPoolSync(int nSteps, int size)
133-
{
134-
// Create HIP stream
135-
hipStream_t stream;
136-
hipStreamCreate(&stream);
137-
138-
// Determine grid and block size
139-
const int blocksize = BLOCKSIZE;
140-
const int gridsize = (size - 1 + blocksize) / blocksize;
141-
142-
// Start timer and begin stepping loop
143-
clock_t tStart = clock();
144-
for(unsigned int i = 0; i < nSteps; i++)
145-
{
146-
int *d_A;
147-
// Allocate pinned device memory
148-
#error allocate memory with cudaMallocAsync for d_A of size in stream
149-
// Launch GPU kernel
150-
hipKernel<<<gridsize, blocksize, 0, stream>>>(d_A, size);
151-
// Free allocation
152-
#error free d_A allocation using cudaFreeAsync in stream
153-
// Synchronization
154-
#error synchronize the stream here
155-
}
156-
// Check results and print timings
157-
checkTiming("recurringAllocMemPoolSync", (double)(clock() - tStart) / CLOCKS_PER_SEC);
158-
159-
// Destroy the stream
160-
hipStreamDestroy(stream);
161-
}
162-
163131
/* The main function */
164132
int main(int argc, char* argv[])
165133
{
@@ -172,6 +140,5 @@ int main(int argc, char* argv[])
172140
// Run with different memory allocatins strategies
173141
noRecurringAlloc(nSteps, size);
174142
recurringAllocNoMemPools(nSteps, size);
175-
recurringAllocMemPoolNoSync(nSteps, size);
176-
recurringAllocMemPoolSync(nSteps, size);
143+
recurringAllocMemPool(nSteps, size);
177144
}

memory/02-mempools/solution/mempools.cpp

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ void noRecurringAlloc(int nSteps, int size)
6161
{
6262
// Launch GPU kernel
6363
hipKernel<<<gridsize, blocksize, 0, 0>>>(d_A, size);
64-
// Synchronization
65-
hipStreamSynchronize(0);
6664
}
65+
// Synchronization
66+
hipStreamSynchronize(0);
6767
// Check results and print timings
6868
checkTiming("noRecurringAlloc", (double)(clock() - tStart) / CLOCKS_PER_SEC);
6969

@@ -87,17 +87,17 @@ void recurringAllocNoMemPools(int nSteps, int size)
8787
hipMalloc((void**)&d_A, size);
8888
// Launch GPU kernel
8989
hipKernel<<<gridsize, blocksize, 0, 0>>>(d_A, size);
90-
// Synchronization
91-
hipStreamSynchronize(0);
9290
// Free allocation
9391
hipFree(d_A);
9492
}
93+
// Synchronization
94+
hipStreamSynchronize(0);
9595
// Check results and print timings
9696
checkTiming("recurringAllocNoMemPools", (double)(clock() - tStart) / CLOCKS_PER_SEC);
9797
}
9898

99-
/* Run using memory pooling but no recurring syncs */
100-
void recurringAllocMemPoolNoSync(int nSteps, int size)
99+
/* Do recurring allocation with memory pooling */
100+
void recurringAllocMemPool(int nSteps, int size)
101101
{
102102
// Create HIP stream
103103
hipStream_t stream;
@@ -128,38 +128,6 @@ void recurringAllocMemPoolNoSync(int nSteps, int size)
128128
hipStreamDestroy(stream);
129129
}
130130

131-
/* Run using memory pooling and recurring syncs */
132-
void recurringAllocMemPoolSync(int nSteps, int size)
133-
{
134-
// Create HIP stream
135-
hipStream_t stream;
136-
hipStreamCreate(&stream);
137-
138-
// Determine grid and block size
139-
const int blocksize = BLOCKSIZE;
140-
const int gridsize = (size - 1 + blocksize) / blocksize;
141-
142-
// Start timer and begin stepping loop
143-
clock_t tStart = clock();
144-
for(unsigned int i = 0; i < nSteps; i++)
145-
{
146-
int *d_A;
147-
// Allocate pinned device memory
148-
cudaMallocAsync((void**)&d_A, size, stream);
149-
// Launch GPU kernel
150-
hipKernel<<<gridsize, blocksize, 0, stream>>>(d_A, size);
151-
// Free allocation
152-
cudaFreeAsync(d_A, stream);
153-
// Synchronization
154-
hipStreamSynchronize(stream);
155-
}
156-
// Check results and print timings
157-
checkTiming("recurringAllocMemPoolSync", (double)(clock() - tStart) / CLOCKS_PER_SEC);
158-
159-
// Destroy the stream
160-
hipStreamDestroy(stream);
161-
}
162-
163131
/* The main function */
164132
int main(int argc, char* argv[])
165133
{
@@ -172,6 +140,5 @@ int main(int argc, char* argv[])
172140
// Run with different memory allocatins strategies
173141
noRecurringAlloc(nSteps, size);
174142
recurringAllocNoMemPools(nSteps, size);
175-
recurringAllocMemPoolNoSync(nSteps, size);
176-
recurringAllocMemPoolSync(nSteps, size);
143+
recurringAllocMemPool(nSteps, size);
177144
}

0 commit comments

Comments
 (0)