-
Notifications
You must be signed in to change notification settings - Fork 3
/
neuron_mempool.c
411 lines (357 loc) · 10.2 KB
/
neuron_mempool.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2020, Amazon.com, Inc. or its affiliates. All Rights Reserved
*/
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
#include <asm/io.h>
#include <linux/errno.h>
#include <linux/genalloc.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
#include <linux/fault-inject.h>
#include "neuron_mempool.h"
#include "neuron_device.h"
int mempool_min_alloc_size = 256;
module_param(mempool_min_alloc_size, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
MODULE_PARM_DESC(mempool_min_alloc_size, "Minimum size for device memory allocation");
#ifdef CONFIG_FAULT_INJECTION
DECLARE_FAULT_ATTR(neuron_fail_mc_alloc);
#endif
// Limit for using kmalloc
#define MEMPOOL_KMALLOC_MAX_SIZE (256 * 1024)
/**
* mc_insert_node() - Insert a mem chunk to the tree
*
* @root: binary tree root
* @mc: memory chunk that needs to be inserted
*/
static void mc_insert_node(struct rb_root *root, struct mem_chunk *mc)
{
struct rb_node **link = &root->rb_node, *parent = NULL;
phys_addr_t pa = mc->pa;
/* Go to the bottom of the tree */
while (*link) {
parent = *link;
struct mem_chunk *mc = rb_entry(parent, struct mem_chunk, node);
if (mc->pa > pa) {
link = &(*link)->rb_left;
} else {
link = &(*link)->rb_right;
}
}
/* Put the new node there */
rb_link_node(&mc->node, parent, link);
rb_insert_color(&mc->node, root);
}
/**
* mc_remove_node() - Remove a mem chunk from the tree
*
* @root: binary tree root
* @mc: memory chunk that needs to be removed
*/
void mc_remove_node(struct rb_root *root, struct mem_chunk *mc)
{
rb_erase(&mc->node, root);
}
/**
* mp_init() Initialize the mempool structure with given values.
* Creates a backing gen_pool if the mem_location is device DRAM.
*
* @mp: pointer to mempool that needs to be initialized
* @start_addr: starting address of the pool
* @pool_size: size of the pool.
* @mem_location: location of the backing memory.
* @dram_channel: device dram channel backing this pool(applicable only if mem_location is device).
* @dram_region: device dram region backing this pool(applicable only if mem_location is device).
*
* Return: 0 if pool is created, a negative error code otherwise.
*/
static int mp_init(struct mempool *mp, u64 start_addr, size_t pool_size,
enum mem_location mem_location, u32 dram_channel, u32 dram_region)
{
int ret;
memset(mp, 0, sizeof(*mp));
mp->mem_location = mem_location;
mp->dram_channel = dram_channel;
mp->dram_region = dram_region;
INIT_LIST_HEAD(&mp->device_allocated_head);
mp->gen_pool = gen_pool_create(ilog2(mempool_min_alloc_size), -1);
if (mp->gen_pool == NULL)
return -ENOMEM;
// 0 is special since we cant differentiate failure(NULL) in gen_pool_alloc().
// so avoid starting at 0 by sacrificing first chunk.
if (start_addr == 0) {
start_addr = mempool_min_alloc_size;
pool_size -= mempool_min_alloc_size;
}
ret = gen_pool_add_virt(mp->gen_pool, start_addr, start_addr, pool_size, -1);
if (ret) {
gen_pool_destroy(mp->gen_pool);
return ret;
}
snprintf(mp->name, sizeof(mp->name), "device mempool [%d:%d]", dram_channel, dram_region);
mp->region_size = pool_size;
mp->initialized = 1;
return 0;
}
/**
* Frees all the chunks associated with the mempool.
*/
static void mp_free_device_mem(struct mempool *mp)
{
BUG_ON(mp == NULL);
if (!mp->initialized)
return;
if (mp->gen_pool != NULL) {
// Free all entries
struct list_head *this, *next;
list_for_each_safe (this, next, &mp->device_allocated_head) {
struct mem_chunk *mc =
list_entry(this, struct mem_chunk, device_allocated_list);
if (mc->va) {
gen_pool_free(mp->gen_pool, (unsigned long)mc->va, mc->size);
mc->va = NULL;
}
list_del(&mc->device_allocated_list);
kfree(mc);
}
mp->allocated_size = 0;
}
}
/**
* Frees all the chunks associated with the mempool and releases the mempool.
*/
static void mp_destroy(struct mempool *mp)
{
BUG_ON(mp == NULL);
if (!mp->initialized)
return;
if (mp->gen_pool != NULL) {
// Free all entries
mp_free_device_mem(mp);
gen_pool_destroy(mp->gen_pool);
}
}
int mpset_host_init(struct mempool_set *mpset)
{
mutex_init(&mpset->lock);
INIT_LIST_HEAD(&mpset->host_allocated_head);
mpset->root = RB_ROOT;
return 0;
}
int mpset_device_init(struct mempool_set *mpset, int num_channels, int num_regions,
const phys_addr_t device_dram_addr[], const u64 device_dram_size[])
{
int ret;
u32 channel, region;
u64 region_sz;
if (num_regions <= 0 || num_regions > 4)
num_regions = 1;
mpset->num_regions = num_regions;
for (channel = 0; channel < num_channels; channel++) {
region_sz = device_dram_size[channel] / mpset->num_regions;
for (region = 0; region < mpset->num_regions; region++) {
dma_addr_t addr = device_dram_addr[channel] + (region * region_sz);
ret = mp_init(&mpset->mp_device[channel][region], addr, region_sz,
MEM_LOC_DEVICE, channel, region);
if (ret) {
pr_err("neuron: mpset device init failed %d\n", ret);
goto fail;
}
}
}
return 0;
fail:
for (; channel >= 0; channel--) {
for (; region >= 0; region--) {
mp_destroy(&mpset->mp_device[channel][region]);
}
}
memset(mpset, 0, sizeof(struct mempool_set));
return ret;
}
static void mpset_free_host_memory(struct mempool_set *mpset)
{
struct list_head *this, *next;
list_for_each_safe (this, next, &mpset->host_allocated_head) {
struct mem_chunk *mc = list_entry(this, struct mem_chunk, host_allocated_list);
if (mc->va) {
write_lock(&mpset->rblock);
mc_remove_node(&mpset->root, mc);
write_unlock(&mpset->rblock);
if (mc->size > MEMPOOL_KMALLOC_MAX_SIZE) {
dma_free_coherent(mpset->pdev, mc->size, mc->va, mc->pa);
} else {
kfree(mc->va);
}
mc->va = NULL;
}
list_del(&mc->host_allocated_list);
kfree(mc);
}
mpset->host_mem_size = 0;
}
void mpset_free_all(struct mempool_set *mpset)
{
u32 channel, region;
mutex_lock(&mpset->lock);
for (channel = 0; channel < V1_MAX_DRAM_CHANNELS; channel++) {
for (region = 0; region < mpset->num_regions; region++) {
mp_free_device_mem(&mpset->mp_device[channel][region]);
}
}
mpset_free_host_memory(mpset);
mutex_unlock(&mpset->lock);
}
void mpset_destroy(struct mempool_set *mpset)
{
u32 channel, region;
mutex_lock(&mpset->lock);
for (channel = 0; channel < V1_MAX_DRAM_CHANNELS; channel++) {
for (region = 0; region < mpset->num_regions; region++) {
mp_destroy(&mpset->mp_device[channel][region]);
}
}
mpset_free_host_memory(mpset);
mutex_unlock(&mpset->lock);
memset(mpset, 0, sizeof(struct mempool_set));
}
struct mem_chunk *mpset_search_mc(struct mempool_set *mp, phys_addr_t pa)
{
struct rb_node *node = mp->root.rb_node; /* top of the tree */
while (node) {
struct mem_chunk *mc = rb_entry(node, struct mem_chunk, node);
if ((mc->pa <= pa) && ((mc->pa + mc->size) >= pa)) {
return mc;
} else if (mc->pa > pa) {
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return NULL;
}
int mc_alloc(struct mempool_set *mpset, struct mem_chunk **result, u32 size,
enum mem_location location, u32 channel, u32 region, u32 nc_id)
{
struct mem_chunk *mc;
int ret = 0;
*result = NULL;
if (channel >= V1_MAX_DRAM_CHANNELS)
return -EINVAL;
#ifdef CONFIG_FAULT_INJECTION
if (should_fail(&neuron_fail_mc_alloc, 1))
return -ENOMEM;
#endif
if (mpset->num_regions == 1) // shared DRAM mode, always use region 0
region = 0;
mc = (struct mem_chunk *)kmalloc(sizeof(struct mem_chunk), GFP_KERNEL);
if (mc == NULL)
return -ENOMEM;
*result = mc;
memset(mc, 0, sizeof(struct mem_chunk));
mutex_lock(&mpset->lock);
if (location == MEM_LOC_HOST) {
if (size > MEMPOOL_KMALLOC_MAX_SIZE) {
dma_addr_t addr;
mc->va = dma_alloc_coherent(mpset->pdev, size, &addr,
GFP_KERNEL | GFP_DMA32);
mc->pa = (phys_addr_t)addr;
} else {
mc->va = (void *)kmalloc(size, GFP_KERNEL);
if (mc->va) {
memset(mc->va, 0, size);
mc->pa = virt_to_phys(mc->va);
}
}
if (mc->va) {
INIT_LIST_HEAD(&mc->host_allocated_list);
list_add(&mc->host_allocated_list, &mpset->host_allocated_head);
write_lock(&mpset->rblock);
mc_insert_node(&mpset->root, mc);
write_unlock(&mpset->rblock);
} else {
pr_info("host mem occupied %lld\n", mpset->host_mem_size);
}
} else {
struct mempool *mp = NULL;
mp = &mpset->mp_device[channel][region];
if (!mp->gen_pool) {
pr_err("neuron: mempool not initialized\n");
ret = -ENOMEM;
goto exit;
}
mc->va = gen_pool_dma_alloc(mp->gen_pool, size, &mc->pa);
if (mc->va) {
INIT_LIST_HEAD(&mc->device_allocated_list);
list_add(&mc->device_allocated_list, &mp->device_allocated_head);
} else {
pr_info("%s total %ld occupied %ld needed %d available %ld\n", mp->name,
mp->region_size, mp->allocated_size, size,
gen_pool_avail(mp->gen_pool));
pr_info("device regions %d occupied %lld\n", mpset->num_regions,
mpset->device_mem_size);
}
mp->allocated_size += size;
}
if (mc->va == NULL) {
ret = -ENOMEM;
goto exit;
}
mc->mpset = mpset;
mc->size = size;
mc->mem_location = location;
mc->dram_channel = channel;
mc->dram_region = region;
mc->nc_id = nc_id;
if (location == MEM_LOC_HOST)
mpset->host_mem_size += size;
else
mpset->device_mem_size += size;
exit:
mutex_unlock(&mpset->lock);
if (ret) {
kfree(mc);
*result = NULL;
}
return ret;
}
void mc_free(struct mem_chunk **mcp)
{
struct mempool_set *mpset;
struct mem_chunk *mc = *mcp;
if (mc == NULL)
return;
mpset = mc->mpset;
mutex_lock(&mpset->lock);
if (mc->mem_location == MEM_LOC_HOST) {
list_del(&mc->host_allocated_list);
write_lock(&mpset->rblock);
mc_remove_node(&mpset->root, mc);
write_unlock(&mpset->rblock);
if (mc->size > MEMPOOL_KMALLOC_MAX_SIZE) {
dma_free_coherent(mpset->pdev, mc->size, mc->va, mc->pa);
} else {
kfree(mc->va);
mc->va = NULL;
}
mpset->host_mem_size -= mc->size;
} else if (mc->mem_location == MEM_LOC_DEVICE) {
struct mempool *mp;
mp = &mpset->mp_device[mc->dram_channel][mc->dram_region];
list_del(&mc->device_allocated_list);
gen_pool_free(mp->gen_pool, (u64)mc->va, mc->size);
mc->va = NULL;
mp->allocated_size -= mc->size;
mpset->device_mem_size -= mc->size;
} else {
BUG();
}
*mcp = NULL;
mutex_unlock(&mpset->lock);
kfree(mc);
}