From 9884fd8df195fe48d4e1be2279b419be96127cae Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Wed, 1 Feb 2006 03:05:30 -0800 Subject: [PATCH] Use 32 bit division in slab_put_obj() Improve the performance of slab_put_obj(). Without the cast, gcc considers ptrdiff_t a 64 bit signed integer and ends up emitting code to use a full signed 128 bit divide on EM64T, which is substantially slower than a 32 bit unsigned divide. I noticed this when looking at the profile of a case where the slab balance is just on edge and thrashes back and forth freeing a block. Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 6f8495e2185b36..88082ae1573657 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1398,7 +1398,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) struct slab *slabp = page_get_slab(virt_to_page(objp)); int objnr; - objnr = (objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; if (objnr) { objp = slabp->s_mem + (objnr - 1) * cachep->objsize; realobj = (char *)objp + obj_dbghead(cachep); @@ -2341,7 +2341,7 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = caller; - objnr = (objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; BUG_ON(objnr >= cachep->num); BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize); @@ -2699,7 +2699,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, slabp = page_get_slab(virt_to_page(objp)); l3 = cachep->nodelists[node]; list_del(&slabp->list); - objnr = (objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); -- cgit 1.2.3-korg From 3dafccf22751429e69b6266636cf3acf45b48075 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 1 Feb 2006 03:05:42 -0800 Subject: [PATCH] slab: distinguish between object and buffer size An object cache has two different object lengths: - the amount of memory available for the user (object size) - the amount of memory allocated internally (buffer size) This patch does some renames to make the code reflect that better. Signed-off-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 154 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 80 insertions(+), 74 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 88082ae1573657..1a014aaf449146 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -375,7 +375,7 @@ struct kmem_cache { unsigned int batchcount; unsigned int limit; unsigned int shared; - unsigned int objsize; + unsigned int buffer_size; /* 2) touched by every alloc & free from the backend */ struct kmem_list3 *nodelists[MAX_NUMNODES]; unsigned int flags; /* constant flags */ @@ -423,8 +423,14 @@ struct kmem_cache { atomic_t freemiss; #endif #if DEBUG - int dbghead; - int reallen; + /* + * If debugging is enabled, then the allocator can add additional + * fields and/or padding to every object. buffer_size contains the total + * object size including these internal fields, the following two + * variables contain the offset to the user object and its size. + */ + int obj_offset; + int obj_size; #endif }; @@ -495,50 +501,50 @@ struct kmem_cache { /* memory layout of objects: * 0 : objp - * 0 .. cachep->dbghead - BYTES_PER_WORD - 1: padding. This ensures that + * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that * the end of an object is aligned with the end of the real * allocation. Catches writes behind the end of the allocation. - * cachep->dbghead - BYTES_PER_WORD .. cachep->dbghead - 1: + * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: * redzone word. - * cachep->dbghead: The real object. - * cachep->objsize - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] - * cachep->objsize - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] + * cachep->obj_offset: The real object. + * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] + * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] */ -static int obj_dbghead(kmem_cache_t *cachep) +static int obj_offset(kmem_cache_t *cachep) { - return cachep->dbghead; + return cachep->obj_offset; } -static int obj_reallen(kmem_cache_t *cachep) +static int obj_size(kmem_cache_t *cachep) { - return cachep->reallen; + return cachep->obj_size; } static unsigned long *dbg_redzone1(kmem_cache_t *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); - return (unsigned long*) (objp+obj_dbghead(cachep)-BYTES_PER_WORD); + return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD); } static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) - return (unsigned long *)(objp + cachep->objsize - + return (unsigned long *)(objp + cachep->buffer_size - 2 * BYTES_PER_WORD); - return (unsigned long *)(objp + cachep->objsize - BYTES_PER_WORD); + return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD); } static void **dbg_userword(kmem_cache_t *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_STORE_USER)); - return (void **)(objp + cachep->objsize - BYTES_PER_WORD); + return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); } #else -#define obj_dbghead(x) 0 -#define obj_reallen(cachep) (cachep->objsize) +#define obj_offset(x) 0 +#define obj_size(cachep) (cachep->buffer_size) #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long *)NULL;}) #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long *)NULL;}) #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) @@ -623,12 +629,12 @@ static kmem_cache_t cache_cache = { .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, - .objsize = sizeof(kmem_cache_t), + .buffer_size = sizeof(kmem_cache_t), .flags = SLAB_NO_REAP, .spinlock = SPIN_LOCK_UNLOCKED, .name = "kmem_cache", #if DEBUG - .reallen = sizeof(kmem_cache_t), + .obj_size = sizeof(kmem_cache_t), #endif }; @@ -1057,9 +1063,9 @@ void __init kmem_cache_init(void) cache_cache.array[smp_processor_id()] = &initarray_cache.cache; cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; - cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); + cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); - cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, + cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, &left_over, &cache_cache.num); if (!cache_cache.num) BUG(); @@ -1274,9 +1280,9 @@ static void kmem_rcu_free(struct rcu_head *head) static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) { - int size = obj_reallen(cachep); + int size = obj_size(cachep); - addr = (unsigned long *)&((char *)addr)[obj_dbghead(cachep)]; + addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; if (size < 5 * sizeof(unsigned long)) return; @@ -1306,8 +1312,8 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) { - int size = obj_reallen(cachep); - addr = &((char *)addr)[obj_dbghead(cachep)]; + int size = obj_size(cachep); + addr = &((char *)addr)[obj_offset(cachep)]; memset(addr, val, size); *(unsigned char *)(addr + size - 1) = POISON_END; @@ -1344,8 +1350,8 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines) (unsigned long)*dbg_userword(cachep, objp)); printk("\n"); } - realobj = (char *)objp + obj_dbghead(cachep); - size = obj_reallen(cachep); + realobj = (char *)objp + obj_offset(cachep); + size = obj_size(cachep); for (i = 0; i < size && lines; i += 16, lines--) { int limit; limit = 16; @@ -1361,8 +1367,8 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) int size, i; int lines = 0; - realobj = (char *)objp + obj_dbghead(cachep); - size = obj_reallen(cachep); + realobj = (char *)objp + obj_offset(cachep); + size = obj_size(cachep); for (i = 0; i < size; i++) { char exp = POISON_FREE; @@ -1398,17 +1404,17 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) struct slab *slabp = page_get_slab(virt_to_page(objp)); int objnr; - objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; if (objnr) { - objp = slabp->s_mem + (objnr - 1) * cachep->objsize; - realobj = (char *)objp + obj_dbghead(cachep); + objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; + realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Prev obj: start=%p, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { - objp = slabp->s_mem + (objnr + 1) * cachep->objsize; - realobj = (char *)objp + obj_dbghead(cachep); + objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; + realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Next obj: start=%p, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); @@ -1428,14 +1434,14 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) #if DEBUG int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem + cachep->objsize * i; + void *objp = slabp->s_mem + cachep->buffer_size * i; if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->objsize % PAGE_SIZE) == 0 + if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->objsize / PAGE_SIZE, + cachep->buffer_size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); @@ -1452,13 +1458,13 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) "was overwritten"); } if (cachep->dtor && !(cachep->flags & SLAB_POISON)) - (cachep->dtor) (objp + obj_dbghead(cachep), cachep, 0); + (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); } #else if (cachep->dtor) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem + cachep->objsize * i; + void *objp = slabp->s_mem + cachep->buffer_size * i; (cachep->dtor) (objp, cachep, 0); } } @@ -1478,7 +1484,7 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) } } -/* For setting up all the kmem_list3s for cache whose objsize is same +/* For setting up all the kmem_list3s for cache whose buffer_size is same as size of kmem_list3. */ static inline void set_up_list3s(kmem_cache_t *cachep, int index) { @@ -1611,7 +1617,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, set_fs(old_fs); if (res) { printk("SLAB: cache with size %d has lost its name\n", - pc->objsize); + pc->buffer_size); continue; } @@ -1702,14 +1708,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, memset(cachep, 0, sizeof(kmem_cache_t)); #if DEBUG - cachep->reallen = size; + cachep->obj_size = size; if (flags & SLAB_RED_ZONE) { /* redzoning only works with word aligned caches */ align = BYTES_PER_WORD; /* add space for red zone words */ - cachep->dbghead += BYTES_PER_WORD; + cachep->obj_offset += BYTES_PER_WORD; size += 2 * BYTES_PER_WORD; } if (flags & SLAB_STORE_USER) { @@ -1722,8 +1728,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { - cachep->dbghead += PAGE_SIZE - size; + && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { + cachep->obj_offset += PAGE_SIZE - size; size = PAGE_SIZE; } #endif @@ -1786,7 +1792,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (flags & SLAB_CACHE_DMA) cachep->gfpflags |= GFP_DMA; spin_lock_init(&cachep->spinlock); - cachep->objsize = size; + cachep->buffer_size = size; if (flags & CFLGS_OFF_SLAB) cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); @@ -2118,7 +2124,7 @@ static void cache_init_objs(kmem_cache_t *cachep, int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem + cachep->objsize * i; + void *objp = slabp->s_mem + cachep->buffer_size * i; #if DEBUG /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) @@ -2136,7 +2142,7 @@ static void cache_init_objs(kmem_cache_t *cachep, * Otherwise, deadlock. They must also be threaded. */ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) - cachep->ctor(objp + obj_dbghead(cachep), cachep, + cachep->ctor(objp + obj_offset(cachep), cachep, ctor_flags); if (cachep->flags & SLAB_RED_ZONE) { @@ -2147,10 +2153,10 @@ static void cache_init_objs(kmem_cache_t *cachep, slab_error(cachep, "constructor overwrote the" " start of an object"); } - if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) + if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) kernel_map_pages(virt_to_page(objp), - cachep->objsize / PAGE_SIZE, 0); + cachep->buffer_size / PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -2309,7 +2315,7 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, unsigned int objnr; struct slab *slabp; - objp -= obj_dbghead(cachep); + objp -= obj_offset(cachep); kfree_debugcheck(objp); page = virt_to_page(objp); @@ -2341,31 +2347,31 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = caller; - objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; BUG_ON(objnr >= cachep->num); - BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize); + BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); if (cachep->flags & SLAB_DEBUG_INITIAL) { /* Need to call the slab's constructor so the * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - cachep->ctor(objp + obj_dbghead(cachep), + cachep->ctor(objp + obj_offset(cachep), cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - cachep->dtor(objp + obj_dbghead(cachep), cachep, 0); + cachep->dtor(objp + obj_offset(cachep), cachep, 0); } if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { + if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { store_stackinfo(cachep, objp, (unsigned long)caller); kernel_map_pages(virt_to_page(objp), - cachep->objsize / PAGE_SIZE, 0); + cachep->buffer_size / PAGE_SIZE, 0); } else { poison_obj(cachep, objp, POISON_FREE); } @@ -2468,7 +2474,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) /* get obj pointer */ ac->entry[ac->avail++] = slabp->s_mem + - slabp->free * cachep->objsize; + slabp->free * cachep->buffer_size; slabp->inuse++; next = slab_bufctl(slabp)[slabp->free]; @@ -2526,9 +2532,9 @@ static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, return objp; if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->objsize / PAGE_SIZE, 1); + cachep->buffer_size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -2553,7 +2559,7 @@ static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } - objp += obj_dbghead(cachep); + objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) { unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; @@ -2648,7 +2654,7 @@ static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) BUG_ON(slabp->inuse == cachep->num); /* get obj pointer */ - obj = slabp->s_mem + slabp->free * cachep->objsize; + obj = slabp->s_mem + slabp->free * cachep->buffer_size; slabp->inuse++; next = slab_bufctl(slabp)[slabp->free]; #if DEBUG @@ -2699,7 +2705,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, slabp = page_get_slab(virt_to_page(objp)); l3 = cachep->nodelists[node]; list_del(&slabp->list); - objnr = (unsigned)(objp - slabp->s_mem) / cachep->objsize; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -2881,7 +2887,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) unsigned long addr = (unsigned long)ptr; unsigned long min_addr = PAGE_OFFSET; unsigned long align_mask = BYTES_PER_WORD - 1; - unsigned long size = cachep->objsize; + unsigned long size = cachep->buffer_size; struct page *page; if (unlikely(addr < min_addr)) @@ -3083,7 +3089,7 @@ void kfree(const void *objp) local_irq_save(flags); kfree_debugcheck(objp); c = page_get_cache(virt_to_page(objp)); - mutex_debug_check_no_locks_freed(objp, obj_reallen(c)); + mutex_debug_check_no_locks_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); } @@ -3114,7 +3120,7 @@ EXPORT_SYMBOL(free_percpu); unsigned int kmem_cache_size(kmem_cache_t *cachep) { - return obj_reallen(cachep); + return obj_size(cachep); } EXPORT_SYMBOL(kmem_cache_size); @@ -3258,13 +3264,13 @@ static void enable_cpucache(kmem_cache_t *cachep) * The numbers are guessed, we should auto-tune as described by * Bonwick. */ - if (cachep->objsize > 131072) + if (cachep->buffer_size > 131072) limit = 1; - else if (cachep->objsize > PAGE_SIZE) + else if (cachep->buffer_size > PAGE_SIZE) limit = 8; - else if (cachep->objsize > 1024) + else if (cachep->buffer_size > 1024) limit = 24; - else if (cachep->objsize > 256) + else if (cachep->buffer_size > 256) limit = 54; else limit = 120; @@ -3279,7 +3285,7 @@ static void enable_cpucache(kmem_cache_t *cachep) */ shared = 0; #ifdef CONFIG_SMP - if (cachep->objsize <= PAGE_SIZE) + if (cachep->buffer_size <= PAGE_SIZE) shared = 8; #endif @@ -3528,7 +3534,7 @@ static int s_show(struct seq_file *m, void *p) printk(KERN_ERR "slab: cache %s error: %s\n", name, error); seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", - name, active_objs, num_objs, cachep->objsize, + name, active_objs, num_objs, cachep->buffer_size, cachep->num, (1 << cachep->gfporder)); seq_printf(m, " : tunables %4u %4u %4u", cachep->limit, cachep->batchcount, cachep->shared); @@ -3656,5 +3662,5 @@ unsigned int ksize(const void *objp) if (unlikely(objp == NULL)) return 0; - return obj_reallen(page_get_cache(virt_to_page(objp))); + return obj_size(page_get_cache(virt_to_page(objp))); } -- cgit 1.2.3-korg From 18f820f655ce93b1e4d9b48fc6fcafc64157c6bc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 1 Feb 2006 03:05:43 -0800 Subject: [PATCH] slab: minor cleanup to kmem_cache_alloc_node Clean up kmem_cache_alloc_node a bit. Signed-off-by: Christoph Lameter Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 1a014aaf449146..bb7a9837b9491e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2928,27 +2928,18 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) unsigned long save_flags; void *ptr; - if (nodeid == -1) - return __cache_alloc(cachep, flags); - - if (unlikely(!cachep->nodelists[nodeid])) { - /* Fall back to __cache_alloc if we run into trouble */ - printk(KERN_WARNING - "slab: not allocating in inactive node %d for cache %s\n", - nodeid, cachep->name); - return __cache_alloc(cachep, flags); - } - cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); - if (nodeid == numa_node_id()) + + if (nodeid == -1 || nodeid == numa_node_id() || + !cachep->nodelists[nodeid]) ptr = ____cache_alloc(cachep, flags); else ptr = __cache_alloc_node(cachep, flags, nodeid); local_irq_restore(save_flags); - ptr = - cache_alloc_debugcheck_after(cachep, flags, ptr, - __builtin_return_address(0)); + + ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, + __builtin_return_address(0)); return ptr; } -- cgit 1.2.3-korg From 5ec8a847bb8ae2ba6395cfb7cb4bfdc78ada82ed Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Feb 2006 03:05:44 -0800 Subject: [PATCH] slab: have index_of bug at compile time I noticed the code for index_of is a creative way of finding the cache index using the compiler to optimize to a single hard coded number. But I couldn't help noticing that it uses two methods to let you know that someone used it wrong. One is at compile time (the correct way), and the other is at run time (not good). Signed-off-by: Steven Rostedt Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index bb7a9837b9491e..613d385519fe8f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -316,6 +316,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; */ static __always_inline int index_of(const size_t size) { + extern void __bad_size(void); + if (__builtin_constant_p(size)) { int i = 0; @@ -326,12 +328,9 @@ static __always_inline int index_of(const size_t size) i++; #include "linux/kmalloc_sizes.h" #undef CACHE - { - extern void __bad_size(void); - __bad_size(); - } + __bad_size(); } else - BUG(); + __bad_size(); return 0; } -- cgit 1.2.3-korg From fbaccacff1f17c65ae0972085368a7ec75be6062 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Feb 2006 03:05:45 -0800 Subject: [PATCH] slab: cache_estimate cleanup Clean up cache_estimate() in mm/slab.c and improves the algorithm from O(n) to O(1). We first calculate the maximum number of objects a slab can hold after struct slab and kmem_bufctl_t for each object has been given enough space. After that, to respect alignment rules, we decrease the number of objects if necessary. As required padding is at most align-1 and memory of obj_size is at least align, it is always enough to decrease number of objects by one. The optimization was originally made by Balbir Singh with more improvements from Steven Rostedt. Manfred Spraul provider further modifications: no loop at all for the off-slab case and added comments to explain the background. Acked-by: Balbir Singh Signed-off-by: Manfred Spraul Signed-off-by: Steven Rostedt Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 22 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 613d385519fe8f..e869400ea731b9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -702,32 +702,69 @@ kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags) } EXPORT_SYMBOL(kmem_find_general_cachep); -/* Cal the num objs, wastage, and bytes left over for a given slab size. */ -static void cache_estimate(unsigned long gfporder, size_t size, size_t align, - int flags, size_t *left_over, unsigned int *num) +static size_t slab_mgmt_size(size_t nr_objs, size_t align) { - int i; - size_t wastage = PAGE_SIZE << gfporder; - size_t extra = 0; - size_t base = 0; + return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); +} - if (!(flags & CFLGS_OFF_SLAB)) { - base = sizeof(struct slab); - extra = sizeof(kmem_bufctl_t); - } - i = 0; - while (i * size + ALIGN(base + i * extra, align) <= wastage) - i++; - if (i > 0) - i--; +/* Calculate the number of objects and left-over bytes for a given + buffer size. */ +static void cache_estimate(unsigned long gfporder, size_t buffer_size, + size_t align, int flags, size_t *left_over, + unsigned int *num) +{ + int nr_objs; + size_t mgmt_size; + size_t slab_size = PAGE_SIZE << gfporder; - if (i > SLAB_LIMIT) - i = SLAB_LIMIT; + /* + * The slab management structure can be either off the slab or + * on it. For the latter case, the memory allocated for a + * slab is used for: + * + * - The struct slab + * - One kmem_bufctl_t for each object + * - Padding to respect alignment of @align + * - @buffer_size bytes for each object + * + * If the slab management structure is off the slab, then the + * alignment will already be calculated into the size. Because + * the slabs are all pages aligned, the objects will be at the + * correct alignment when allocated. + */ + if (flags & CFLGS_OFF_SLAB) { + mgmt_size = 0; + nr_objs = slab_size / buffer_size; - *num = i; - wastage -= i * size; - wastage -= ALIGN(base + i * extra, align); - *left_over = wastage; + if (nr_objs > SLAB_LIMIT) + nr_objs = SLAB_LIMIT; + } else { + /* + * Ignore padding for the initial guess. The padding + * is at most @align-1 bytes, and @buffer_size is at + * least @align. In the worst case, this result will + * be one greater than the number of objects that fit + * into the memory allocation when taking the padding + * into account. + */ + nr_objs = (slab_size - sizeof(struct slab)) / + (buffer_size + sizeof(kmem_bufctl_t)); + + /* + * This calculated number will be either the right + * amount, or one greater than what we want. + */ + if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size + > slab_size) + nr_objs--; + + if (nr_objs > SLAB_LIMIT) + nr_objs = SLAB_LIMIT; + + mgmt_size = slab_mgmt_size(nr_objs, align); + } + *num = nr_objs; + *left_over = slab_size - nr_objs*buffer_size - mgmt_size; } #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) -- cgit 1.2.3-korg From 12dd36faec5d3bd96da84fa8f76efecc632930ab Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Wed, 1 Feb 2006 03:05:46 -0800 Subject: [PATCH] slab: extract slab_destroy_objs() Create a helper function, slab_destroy_objs() which called from slab_destroy(). This makes slab_destroy() smaller and more readable, and moves ifdefs outside the function body. Signed-off-by: Matthew Dobson Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index e869400ea731b9..85adf0992011aa 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1459,15 +1459,13 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) } #endif -/* Destroy all the objs in a slab, and release the mem back to the system. - * Before calling the slab must have been unlinked from the cache. - * The cache-lock is not held/needed. +#if DEBUG +/** + * slab_destroy_objs - call the registered destructor for each object in + * a slab that is to be destroyed. */ -static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) +static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) { - void *addr = slabp->s_mem - slabp->colouroff; - -#if DEBUG int i; for (i = 0; i < cachep->num; i++) { void *objp = slabp->s_mem + cachep->buffer_size * i; @@ -1496,7 +1494,10 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) if (cachep->dtor && !(cachep->flags & SLAB_POISON)) (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); } +} #else +static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) +{ if (cachep->dtor) { int i; for (i = 0; i < cachep->num; i++) { @@ -1504,8 +1505,19 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) (cachep->dtor) (objp, cachep, 0); } } +} #endif +/** + * Destroy all the objs in a slab, and release the mem back to the system. + * Before calling the slab must have been unlinked from the cache. + * The cache-lock is not held/needed. + */ +static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) +{ + void *addr = slabp->s_mem - slabp->colouroff; + + slab_destroy_objs(cachep, slabp); if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { struct slab_rcu *slab_rcu; -- cgit 1.2.3-korg From 78d382d77c84229d031431931bf6490d5da6ab86 Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Wed, 1 Feb 2006 03:05:47 -0800 Subject: [PATCH] slab: extract slab_{put|get}_obj Create two helper functions slab_get_obj() and slab_put_obj() to replace duplicated code in mm/slab.c Signed-off-by: Matthew Dobson Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 77 +++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 85adf0992011aa..594a9155c7d83e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2226,6 +2226,42 @@ static void kmem_flagcheck(kmem_cache_t *cachep, gfp_t flags) } } +static void *slab_get_obj(kmem_cache_t *cachep, struct slab *slabp, int nodeid) +{ + void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); + kmem_bufctl_t next; + + slabp->inuse++; + next = slab_bufctl(slabp)[slabp->free]; +#if DEBUG + slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; + WARN_ON(slabp->nodeid != nodeid); +#endif + slabp->free = next; + + return objp; +} + +static void slab_put_obj(kmem_cache_t *cachep, struct slab *slabp, void *objp, + int nodeid) +{ + unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; + +#if DEBUG + /* Verify that the slab belongs to the intended node */ + WARN_ON(slabp->nodeid != nodeid); + + if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { + printk(KERN_ERR "slab: double free detected in cache " + "'%s', objp %p\n", cachep->name, objp); + BUG(); + } +#endif + slab_bufctl(slabp)[objnr] = slabp->free; + slabp->free = objnr; + slabp->inuse--; +} + static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) { int i; @@ -2515,22 +2551,12 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) check_slabp(cachep, slabp); check_spinlock_acquired(cachep); while (slabp->inuse < cachep->num && batchcount--) { - kmem_bufctl_t next; STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - /* get obj pointer */ - ac->entry[ac->avail++] = slabp->s_mem + - slabp->free * cachep->buffer_size; - - slabp->inuse++; - next = slab_bufctl(slabp)[slabp->free]; -#if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; - WARN_ON(numa_node_id() != slabp->nodeid); -#endif - slabp->free = next; + ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, + numa_node_id()); } check_slabp(cachep, slabp); @@ -2675,7 +2701,6 @@ static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) struct slab *slabp; struct kmem_list3 *l3; void *obj; - kmem_bufctl_t next; int x; l3 = cachep->nodelists[nodeid]; @@ -2701,14 +2726,7 @@ static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) BUG_ON(slabp->inuse == cachep->num); - /* get obj pointer */ - obj = slabp->s_mem + slabp->free * cachep->buffer_size; - slabp->inuse++; - next = slab_bufctl(slabp)[slabp->free]; -#if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; -#endif - slabp->free = next; + obj = slab_get_obj(cachep, slabp, nodeid); check_slabp(cachep, slabp); l3->free_objects--; /* move slabp to correct slabp list: */ @@ -2748,29 +2766,14 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, for (i = 0; i < nr_objects; i++) { void *objp = objpp[i]; struct slab *slabp; - unsigned int objnr; slabp = page_get_slab(virt_to_page(objp)); l3 = cachep->nodelists[node]; list_del(&slabp->list); - objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); - -#if DEBUG - /* Verify that the slab belongs to the intended node */ - WARN_ON(slabp->nodeid != node); - - if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { - printk(KERN_ERR "slab: double free detected in cache " - "'%s', objp %p\n", cachep->name, objp); - BUG(); - } -#endif - slab_bufctl(slabp)[objnr] = slabp->free; - slabp->free = objnr; + slab_put_obj(cachep, slabp, objp, node); STATS_DEC_ACTIVE(cachep); - slabp->inuse--; l3->free_objects++; check_slabp(cachep, slabp); -- cgit 1.2.3-korg From 5295a74cc0bcf1291686eb734ccb06baa3d55c1a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Feb 2006 03:05:48 -0800 Subject: [PATCH] slab: reduce inlining From: Manfred Spraul Reduce the amount of inline functions in slab to the functions that are used in the hot path: - no inline for debug functions - no __always_inline, inline is already __always_inline - remove inline from a few numa support functions. Before: text data bss dec hex filename 13588 752 48 14388 3834 mm/slab.o (defconfig) 16671 2492 48 19211 4b0b mm/slab.o (numa) After: text data bss dec hex filename 13366 752 48 14166 3756 mm/slab.o (defconfig) 16230 2492 48 18770 4952 mm/slab.o (numa) Signed-off-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 594a9155c7d83e..ba288b3877d1c2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -337,7 +337,7 @@ static __always_inline int index_of(const size_t size) #define INDEX_AC index_of(sizeof(struct arraycache_init)) #define INDEX_L3 index_of(sizeof(struct kmem_list3)) -static inline void kmem_list3_init(struct kmem_list3 *parent) +static void kmem_list3_init(struct kmem_list3 *parent) { INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_partial); @@ -818,7 +818,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, #ifdef CONFIG_NUMA static void *__cache_alloc_node(kmem_cache_t *, gfp_t, int); -static inline struct array_cache **alloc_alien_cache(int node, int limit) +static struct array_cache **alloc_alien_cache(int node, int limit) { struct array_cache **ac_ptr; int memsize = sizeof(void *) * MAX_NUMNODES; @@ -845,7 +845,7 @@ static inline struct array_cache **alloc_alien_cache(int node, int limit) return ac_ptr; } -static inline void free_alien_cache(struct array_cache **ac_ptr) +static void free_alien_cache(struct array_cache **ac_ptr) { int i; @@ -858,8 +858,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) kfree(ac_ptr); } -static inline void __drain_alien_cache(kmem_cache_t *cachep, - struct array_cache *ac, int node) +static void __drain_alien_cache(kmem_cache_t *cachep, + struct array_cache *ac, int node) { struct kmem_list3 *rl3 = cachep->nodelists[node]; @@ -1534,7 +1534,7 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) /* For setting up all the kmem_list3s for cache whose buffer_size is same as size of kmem_list3. */ -static inline void set_up_list3s(kmem_cache_t *cachep, int index) +static void set_up_list3s(kmem_cache_t *cachep, int index) { int node; @@ -1937,7 +1937,7 @@ static void check_spinlock_acquired(kmem_cache_t *cachep) #endif } -static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node) +static void check_spinlock_acquired_node(kmem_cache_t *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); -- cgit 1.2.3-korg From 6ed5eb2211204224799b2821656bbbfde26ef200 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Feb 2006 03:05:49 -0800 Subject: [PATCH] slab: extract virt_to_{cache|slab} Introduce virt_to_cache() and virt_to_slab() functions to reduce duplicate code and introduce a proper abstraction should we want to support other kind of mapping for address to slab and cache (eg. for vmalloc() or I/O memory). Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index ba288b3877d1c2..c2f9e0a330ffb2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -596,6 +596,18 @@ static inline struct slab *page_get_slab(struct page *page) return (struct slab *)page->lru.prev; } +static inline struct kmem_cache *virt_to_cache(const void *obj) +{ + struct page *page = virt_to_page(obj); + return page_get_cache(page); +} + +static inline struct slab *virt_to_slab(const void *obj) +{ + struct page *page = virt_to_page(obj); + return page_get_slab(page); +} + /* These are the default caches for kmalloc. Custom caches can have other sizes. */ struct cache_sizes malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, @@ -1437,7 +1449,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) /* Print some data about the neighboring objects, if they * exist: */ - struct slab *slabp = page_get_slab(virt_to_page(objp)); + struct slab *slabp = virt_to_slab(objp); int objnr; objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; @@ -2767,7 +2779,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, void *objp = objpp[i]; struct slab *slabp; - slabp = page_get_slab(virt_to_page(objp)); + slabp = virt_to_slab(objp); l3 = cachep->nodelists[node]; list_del(&slabp->list); check_spinlock_acquired_node(cachep, node); @@ -2867,7 +2879,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) #ifdef CONFIG_NUMA { struct slab *slabp; - slabp = page_get_slab(virt_to_page(objp)); + slabp = virt_to_slab(objp); if (unlikely(slabp->nodeid != numa_node_id())) { struct array_cache *alien = NULL; int nodeid = slabp->nodeid; @@ -3130,7 +3142,7 @@ void kfree(const void *objp) return; local_irq_save(flags); kfree_debugcheck(objp); - c = page_get_cache(virt_to_page(objp)); + c = virt_to_cache(objp); mutex_debug_check_no_locks_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); @@ -3704,5 +3716,5 @@ unsigned int ksize(const void *objp) if (unlikely(objp == NULL)) return 0; - return obj_size(page_get_cache(virt_to_page(objp))); + return obj_size(virt_to_cache(objp)); } -- cgit 1.2.3-korg From 9a2dba4b4912b493070cbc170629fdbf440b01d7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Feb 2006 03:05:49 -0800 Subject: [PATCH] slab: rename ac_data to cpu_cache_get Rename the ac_data() function to more descriptive cpu_cache_get(). Acked-by: Manfred Spraul Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index c2f9e0a330ffb2..b1909386499846 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -679,7 +679,7 @@ static void enable_cpucache(kmem_cache_t *cachep); static void cache_reap(void *unused); static int __node_shrink(kmem_cache_t *cachep, int node); -static inline struct array_cache *ac_data(kmem_cache_t *cachep) +static inline struct array_cache *cpu_cache_get(kmem_cache_t *cachep) { return cachep->array[smp_processor_id()]; } @@ -1186,8 +1186,8 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); local_irq_disable(); - BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, ac_data(&cache_cache), + BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); + memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); cache_cache.array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1195,9 +1195,9 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); local_irq_disable(); - BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep) + BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) != &initarray_generic.cache); - memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep), + memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), sizeof(struct arraycache_init)); malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; @@ -1235,7 +1235,7 @@ void __init kmem_cache_init(void) g_cpucache_up = FULL; /* Register a cpu startup notifier callback - * that initializes ac_data for all new cpus + * that initializes cpu_cache_get for all new cpus */ register_cpu_notifier(&cpucache_notifier); @@ -1909,11 +1909,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - BUG_ON(!ac_data(cachep)); - ac_data(cachep)->avail = 0; - ac_data(cachep)->limit = BOOT_CPUCACHE_ENTRIES; - ac_data(cachep)->batchcount = 1; - ac_data(cachep)->touched = 0; + BUG_ON(!cpu_cache_get(cachep)); + cpu_cache_get(cachep)->avail = 0; + cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; + cpu_cache_get(cachep)->batchcount = 1; + cpu_cache_get(cachep)->touched = 0; cachep->batchcount = 1; cachep->limit = BOOT_CPUCACHE_ENTRIES; } @@ -1992,7 +1992,7 @@ static void do_drain(void *arg) int node = numa_node_id(); check_irq_off(); - ac = ac_data(cachep); + ac = cpu_cache_get(cachep); spin_lock(&cachep->nodelists[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); spin_unlock(&cachep->nodelists[node]->list_lock); @@ -2518,7 +2518,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) struct array_cache *ac; check_irq_off(); - ac = ac_data(cachep); + ac = cpu_cache_get(cachep); retry: batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { @@ -2590,7 +2590,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) x = cache_grow(cachep, flags, numa_node_id()); // cache_grow can reenable interrupts, then ac could change. - ac = ac_data(cachep); + ac = cpu_cache_get(cachep); if (!x && ac->avail == 0) // no objects in sight? abort return NULL; @@ -2675,7 +2675,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) #endif check_irq_off(); - ac = ac_data(cachep); + ac = cpu_cache_get(cachep); if (likely(ac->avail)) { STATS_INC_ALLOCHIT(cachep); ac->touched = 1; @@ -2868,7 +2868,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) */ static inline void __cache_free(kmem_cache_t *cachep, void *objp) { - struct array_cache *ac = ac_data(cachep); + struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); @@ -3253,7 +3253,7 @@ static void do_ccupdate_local(void *info) struct array_cache *old; check_irq_off(); - old = ac_data(new->cachep); + old = cpu_cache_get(new->cachep); new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; new->new[smp_processor_id()] = old; @@ -3419,7 +3419,7 @@ static void cache_reap(void *unused) drain_alien_cache(searchp, l3); spin_lock_irq(&l3->list_lock); - drain_array_locked(searchp, ac_data(searchp), 0, + drain_array_locked(searchp, cpu_cache_get(searchp), 0, numa_node_id()); if (time_after(l3->next_reap, jiffies)) -- cgit 1.2.3-korg From 343e0d7a93951e35065fdb5e3dd61aece0ec6b3c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Feb 2006 03:05:50 -0800 Subject: [PATCH] slab: replace kmem_cache_t with struct kmem_cache Replace uses of kmem_cache_t with proper struct kmem_cache in mm/slab.c. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 195 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 98 insertions(+), 97 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index b1909386499846..6fbd6a1cdeb4a3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -55,7 +55,7 @@ * * SMP synchronization: * constructors and destructors are called without any locking. - * Several members in kmem_cache_t and struct slab never change, they + * Several members in struct kmem_cache and struct slab never change, they * are accessed without any locking. * The per-cpu arrays are never accessed from the wrong cpu, no locking, * and local interrupts are disabled so slab code is preempt-safe. @@ -244,7 +244,7 @@ struct slab { */ struct slab_rcu { struct rcu_head head; - kmem_cache_t *cachep; + struct kmem_cache *cachep; void *addr; }; @@ -363,7 +363,7 @@ static void kmem_list3_init(struct kmem_list3 *parent) } while (0) /* - * kmem_cache_t + * struct kmem_cache * * manages a cache. */ @@ -391,15 +391,15 @@ struct kmem_cache { size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ unsigned int colour_next; /* cache colouring */ - kmem_cache_t *slabp_cache; + struct kmem_cache *slabp_cache; unsigned int slab_size; unsigned int dflags; /* dynamic flags */ /* constructor func */ - void (*ctor) (void *, kmem_cache_t *, unsigned long); + void (*ctor) (void *, struct kmem_cache *, unsigned long); /* de-constructor func */ - void (*dtor) (void *, kmem_cache_t *, unsigned long); + void (*dtor) (void *, struct kmem_cache *, unsigned long); /* 4) cache creation/removal */ const char *name; @@ -509,23 +509,23 @@ struct kmem_cache { * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] */ -static int obj_offset(kmem_cache_t *cachep) +static int obj_offset(struct kmem_cache *cachep) { return cachep->obj_offset; } -static int obj_size(kmem_cache_t *cachep) +static int obj_size(struct kmem_cache *cachep) { return cachep->obj_size; } -static unsigned long *dbg_redzone1(kmem_cache_t *cachep, void *objp) +static unsigned long *dbg_redzone1(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD); } -static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp) +static unsigned long *dbg_redzone2(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) @@ -534,7 +534,7 @@ static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp) return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD); } -static void **dbg_userword(kmem_cache_t *cachep, void *objp) +static void **dbg_userword(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_STORE_USER)); return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); @@ -636,16 +636,16 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static kmem_cache_t cache_cache = { +static struct kmem_cache cache_cache = { .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, - .buffer_size = sizeof(kmem_cache_t), + .buffer_size = sizeof(struct kmem_cache), .flags = SLAB_NO_REAP, .spinlock = SPIN_LOCK_UNLOCKED, .name = "kmem_cache", #if DEBUG - .obj_size = sizeof(kmem_cache_t), + .obj_size = sizeof(struct kmem_cache), #endif }; @@ -674,17 +674,17 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(kmem_cache_t *cachep, void **objpp, int len, int node); -static void enable_cpucache(kmem_cache_t *cachep); +static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); +static void enable_cpucache(struct kmem_cache *cachep); static void cache_reap(void *unused); -static int __node_shrink(kmem_cache_t *cachep, int node); +static int __node_shrink(struct kmem_cache *cachep, int node); -static inline struct array_cache *cpu_cache_get(kmem_cache_t *cachep) +static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) +static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -708,7 +708,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) return csizep->cs_cachep; } -kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags) +struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) { return __find_general_cachep(size, gfpflags); } @@ -781,7 +781,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) -static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) +static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) { printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", function, cachep->name, msg); @@ -828,7 +828,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, } #ifdef CONFIG_NUMA -static void *__cache_alloc_node(kmem_cache_t *, gfp_t, int); +static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); static struct array_cache **alloc_alien_cache(int node, int limit) { @@ -870,7 +870,7 @@ static void free_alien_cache(struct array_cache **ac_ptr) kfree(ac_ptr); } -static void __drain_alien_cache(kmem_cache_t *cachep, +static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { struct kmem_list3 *rl3 = cachep->nodelists[node]; @@ -883,7 +883,7 @@ static void __drain_alien_cache(kmem_cache_t *cachep, } } -static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) +static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) { int i = 0; struct array_cache *ac; @@ -908,7 +908,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; - kmem_cache_t *cachep; + struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); int memsize = sizeof(struct kmem_list3); @@ -1046,7 +1046,7 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; /* * swap the static kmem_list3 with kmalloced memory */ -static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, int nodeid) +static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) { struct kmem_list3 *ptr; @@ -1086,14 +1086,14 @@ void __init kmem_cache_init(void) /* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: - * 1) initialize the cache_cache cache: it contains the kmem_cache_t + * 1) initialize the cache_cache cache: it contains the struct kmem_cache * structures of all caches, except cache_cache itself: cache_cache * is statically allocated. * Initially an __init data area is used for the head array and the * kmem_list3 structures, it's replaced with a kmalloc allocated * array at the end of the bootstrap. * 2) Create the first kmalloc cache. - * The kmem_cache_t for the new cache is allocated normally. + * The struct kmem_cache for the new cache is allocated normally. * An __init data area is used for the head array. * 3) Create the remaining kmalloc caches, with minimally sized * head arrays. @@ -1224,7 +1224,7 @@ void __init kmem_cache_init(void) /* 6) resize the head arrays to their final sizes */ { - kmem_cache_t *cachep; + struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) enable_cpucache(cachep); @@ -1267,7 +1267,7 @@ __initcall(cpucache_init); * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) +static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct page *page; void *addr; @@ -1293,7 +1293,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) /* * Interface to system's page release. */ -static void kmem_freepages(kmem_cache_t *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, void *addr) { unsigned long i = (1 << cachep->gfporder); struct page *page = virt_to_page(addr); @@ -1315,7 +1315,7 @@ static void kmem_freepages(kmem_cache_t *cachep, void *addr) static void kmem_rcu_free(struct rcu_head *head) { struct slab_rcu *slab_rcu = (struct slab_rcu *)head; - kmem_cache_t *cachep = slab_rcu->cachep; + struct kmem_cache *cachep = slab_rcu->cachep; kmem_freepages(cachep, slab_rcu->addr); if (OFF_SLAB(cachep)) @@ -1325,7 +1325,7 @@ static void kmem_rcu_free(struct rcu_head *head) #if DEBUG #ifdef CONFIG_DEBUG_PAGEALLOC -static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, +static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, unsigned long caller) { int size = obj_size(cachep); @@ -1358,7 +1358,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, } #endif -static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) +static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) { int size = obj_size(cachep); addr = &((char *)addr)[obj_offset(cachep)]; @@ -1380,7 +1380,7 @@ static void dump_line(char *data, int offset, int limit) #if DEBUG -static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines) +static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) { int i, size; char *realobj; @@ -1409,7 +1409,7 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines) } } -static void check_poison_obj(kmem_cache_t *cachep, void *objp) +static void check_poison_obj(struct kmem_cache *cachep, void *objp) { char *realobj; int size, i; @@ -1476,7 +1476,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) * slab_destroy_objs - call the registered destructor for each object in * a slab that is to be destroyed. */ -static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) +static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) { int i; for (i = 0; i < cachep->num; i++) { @@ -1508,7 +1508,7 @@ static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) } } #else -static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) +static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) { if (cachep->dtor) { int i; @@ -1525,7 +1525,7 @@ static void slab_destroy_objs(kmem_cache_t *cachep, struct slab *slabp) * Before calling the slab must have been unlinked from the cache. * The cache-lock is not held/needed. */ -static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) { void *addr = slabp->s_mem - slabp->colouroff; @@ -1546,7 +1546,7 @@ static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) /* For setting up all the kmem_list3s for cache whose buffer_size is same as size of kmem_list3. */ -static void set_up_list3s(kmem_cache_t *cachep, int index) +static void set_up_list3s(struct kmem_cache *cachep, int index) { int node; @@ -1566,7 +1566,7 @@ static void set_up_list3s(kmem_cache_t *cachep, int index) * high order pages for slabs. When the gfp() functions are more friendly * towards high-order requests, this should be changed. */ -static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size, +static inline size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, size_t align, gfp_t flags) { size_t left_over = 0; @@ -1638,13 +1638,13 @@ static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size, * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ -kmem_cache_t * +struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long), - void (*dtor)(void*, kmem_cache_t *, unsigned long)) + unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), + void (*dtor)(void*, struct kmem_cache *, unsigned long)) { size_t left_over, slab_size, ralign; - kmem_cache_t *cachep = NULL; + struct kmem_cache *cachep = NULL; struct list_head *p; /* @@ -1662,7 +1662,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, mutex_lock(&cache_chain_mutex); list_for_each(p, &cache_chain) { - kmem_cache_t *pc = list_entry(p, kmem_cache_t, next); + struct kmem_cache *pc = list_entry(p, struct kmem_cache, next); mm_segment_t old_fs = get_fs(); char tmp; int res; @@ -1762,10 +1762,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, align = ralign; /* Get cache's description obj. */ - cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL); + cachep = kmem_cache_alloc(&cache_cache, SLAB_KERNEL); if (!cachep) goto oops; - memset(cachep, 0, sizeof(kmem_cache_t)); + memset(cachep, 0, sizeof(struct kmem_cache)); #if DEBUG cachep->obj_size = size; @@ -1941,7 +1941,7 @@ static void check_irq_on(void) BUG_ON(irqs_disabled()); } -static void check_spinlock_acquired(kmem_cache_t *cachep) +static void check_spinlock_acquired(struct kmem_cache *cachep) { #ifdef CONFIG_SMP check_irq_off(); @@ -1949,7 +1949,7 @@ static void check_spinlock_acquired(kmem_cache_t *cachep) #endif } -static void check_spinlock_acquired_node(kmem_cache_t *cachep, int node) +static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); @@ -1982,12 +1982,12 @@ static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) preempt_enable(); } -static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, +static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, int force, int node); static void do_drain(void *arg) { - kmem_cache_t *cachep = (kmem_cache_t *) arg; + struct kmem_cache *cachep = (struct kmem_cache *) arg; struct array_cache *ac; int node = numa_node_id(); @@ -1999,7 +1999,7 @@ static void do_drain(void *arg) ac->avail = 0; } -static void drain_cpu_caches(kmem_cache_t *cachep) +static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_list3 *l3; int node; @@ -2020,7 +2020,7 @@ static void drain_cpu_caches(kmem_cache_t *cachep) spin_unlock_irq(&cachep->spinlock); } -static int __node_shrink(kmem_cache_t *cachep, int node) +static int __node_shrink(struct kmem_cache *cachep, int node) { struct slab *slabp; struct kmem_list3 *l3 = cachep->nodelists[node]; @@ -2049,7 +2049,7 @@ static int __node_shrink(kmem_cache_t *cachep, int node) return ret; } -static int __cache_shrink(kmem_cache_t *cachep) +static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; struct kmem_list3 *l3; @@ -2075,7 +2075,7 @@ static int __cache_shrink(kmem_cache_t *cachep) * Releases as many slabs as possible for a cache. * To help debugging, a zero exit status indicates all slabs were released. */ -int kmem_cache_shrink(kmem_cache_t *cachep) +int kmem_cache_shrink(struct kmem_cache *cachep) { if (!cachep || in_interrupt()) BUG(); @@ -2088,7 +2088,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * kmem_cache_destroy - delete a cache * @cachep: the cache to destroy * - * Remove a kmem_cache_t object from the slab cache. + * Remove a struct kmem_cache object from the slab cache. * Returns 0 on success. * * It is expected this function will be called by a module when it is @@ -2101,7 +2101,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * The caller must guarantee that noone will allocate memory from the cache * during the kmem_cache_destroy(). */ -int kmem_cache_destroy(kmem_cache_t *cachep) +int kmem_cache_destroy(struct kmem_cache *cachep) { int i; struct kmem_list3 *l3; @@ -2152,7 +2152,7 @@ int kmem_cache_destroy(kmem_cache_t *cachep) EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ -static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp, +static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, int colour_off, gfp_t local_flags) { struct slab *slabp; @@ -2178,7 +2178,7 @@ static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) return (kmem_bufctl_t *) (slabp + 1); } -static void cache_init_objs(kmem_cache_t *cachep, +static void cache_init_objs(struct kmem_cache *cachep, struct slab *slabp, unsigned long ctor_flags) { int i; @@ -2227,7 +2227,7 @@ static void cache_init_objs(kmem_cache_t *cachep, slabp->free = 0; } -static void kmem_flagcheck(kmem_cache_t *cachep, gfp_t flags) +static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) { if (flags & SLAB_DMA) { if (!(cachep->gfpflags & GFP_DMA)) @@ -2238,7 +2238,7 @@ static void kmem_flagcheck(kmem_cache_t *cachep, gfp_t flags) } } -static void *slab_get_obj(kmem_cache_t *cachep, struct slab *slabp, int nodeid) +static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) { void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); kmem_bufctl_t next; @@ -2254,7 +2254,7 @@ static void *slab_get_obj(kmem_cache_t *cachep, struct slab *slabp, int nodeid) return objp; } -static void slab_put_obj(kmem_cache_t *cachep, struct slab *slabp, void *objp, +static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, int nodeid) { unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; @@ -2274,7 +2274,7 @@ static void slab_put_obj(kmem_cache_t *cachep, struct slab *slabp, void *objp, slabp->inuse--; } -static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) +static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) { int i; struct page *page; @@ -2293,7 +2293,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) +static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct slab *slabp; void *objp; @@ -2404,7 +2404,7 @@ static void kfree_debugcheck(const void *objp) } } -static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, +static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, void *caller) { struct page *page; @@ -2478,7 +2478,7 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, return objp; } -static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) +static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) { kmem_bufctl_t i; int entries = 0; @@ -2511,7 +2511,7 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) +static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) { int batchcount; struct kmem_list3 *l3; @@ -2602,7 +2602,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) } static inline void -cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) +cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) { might_sleep_if(flags & __GFP_WAIT); #if DEBUG @@ -2611,7 +2611,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) } #if DEBUG -static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, +static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, void *objp, void *caller) { if (!objp) @@ -2660,7 +2660,7 @@ static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) #endif -static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) +static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; struct array_cache *ac; @@ -2687,7 +2687,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) return objp; } -static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) +static inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags) { unsigned long save_flags; void *objp; @@ -2707,7 +2707,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) /* * A interface to enable slab creation on nodeid */ -static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) +static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; struct slab *slabp; @@ -2769,7 +2769,7 @@ static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) /* * Caller needs to acquire correct kmem_list's list_lock */ -static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, +static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, int node) { int i; @@ -2807,7 +2807,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, } } -static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) +static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) { int batchcount; struct kmem_list3 *l3; @@ -2866,7 +2866,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) * * Called with disabled ints. */ -static inline void __cache_free(kmem_cache_t *cachep, void *objp) +static inline void __cache_free(struct kmem_cache *cachep, void *objp) { struct array_cache *ac = cpu_cache_get(cachep); @@ -2925,7 +2925,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) * Allocate an object from this cache. The flags are only relevant * if the cache has no available objects. */ -void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags) +void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { return __cache_alloc(cachep, flags); } @@ -2945,7 +2945,7 @@ EXPORT_SYMBOL(kmem_cache_alloc); * * Currently only used for dentry validation. */ -int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) +int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) { unsigned long addr = (unsigned long)ptr; unsigned long min_addr = PAGE_OFFSET; @@ -2986,7 +2986,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) * New and improved: it will now make sure that the object gets * put on the correct node list so that there is no false sharing. */ -void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) +void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { unsigned long save_flags; void *ptr; @@ -3010,7 +3010,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); void *kmalloc_node(size_t size, gfp_t flags, int node) { - kmem_cache_t *cachep; + struct kmem_cache *cachep; cachep = kmem_find_general_cachep(size, flags); if (unlikely(cachep == NULL)) @@ -3043,7 +3043,7 @@ EXPORT_SYMBOL(kmalloc_node); */ void *__kmalloc(size_t size, gfp_t flags) { - kmem_cache_t *cachep; + struct kmem_cache *cachep; /* If you want to save a few bytes .text space: replace * __ with kmem_. @@ -3114,7 +3114,7 @@ EXPORT_SYMBOL(__alloc_percpu); * Free an object which was previously allocated from this * cache. */ -void kmem_cache_free(kmem_cache_t *cachep, void *objp) +void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; @@ -3135,7 +3135,7 @@ EXPORT_SYMBOL(kmem_cache_free); */ void kfree(const void *objp) { - kmem_cache_t *c; + struct kmem_cache *c; unsigned long flags; if (unlikely(!objp)) @@ -3172,13 +3172,13 @@ void free_percpu(const void *objp) EXPORT_SYMBOL(free_percpu); #endif -unsigned int kmem_cache_size(kmem_cache_t *cachep) +unsigned int kmem_cache_size(struct kmem_cache *cachep) { return obj_size(cachep); } EXPORT_SYMBOL(kmem_cache_size); -const char *kmem_cache_name(kmem_cache_t *cachep) +const char *kmem_cache_name(struct kmem_cache *cachep) { return cachep->name; } @@ -3187,7 +3187,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); /* * This initializes kmem_list3 for all nodes. */ -static int alloc_kmemlist(kmem_cache_t *cachep) +static int alloc_kmemlist(struct kmem_cache *cachep) { int node; struct kmem_list3 *l3; @@ -3243,7 +3243,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep) } struct ccupdate_struct { - kmem_cache_t *cachep; + struct kmem_cache *cachep; struct array_cache *new[NR_CPUS]; }; @@ -3259,7 +3259,7 @@ static void do_ccupdate_local(void *info) new->new[smp_processor_id()] = old; } -static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, +static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared) { struct ccupdate_struct new; @@ -3305,7 +3305,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, return 0; } -static void enable_cpucache(kmem_cache_t *cachep) +static void enable_cpucache(struct kmem_cache *cachep) { int err; int limit, shared; @@ -3357,7 +3357,7 @@ static void enable_cpucache(kmem_cache_t *cachep) cachep->name, -err); } -static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, +static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, int force, int node) { int tofree; @@ -3402,12 +3402,12 @@ static void cache_reap(void *unused) } list_for_each(walk, &cache_chain) { - kmem_cache_t *searchp; + struct kmem_cache *searchp; struct list_head *p; int tofree; struct slab *slabp; - searchp = list_entry(walk, kmem_cache_t, next); + searchp = list_entry(walk, struct kmem_cache, next); if (searchp->flags & SLAB_NO_REAP) goto next; @@ -3510,15 +3510,15 @@ static void *s_start(struct seq_file *m, loff_t *pos) if (p == &cache_chain) return NULL; } - return list_entry(p, kmem_cache_t, next); + return list_entry(p, struct kmem_cache, next); } static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - kmem_cache_t *cachep = p; + struct kmem_cache *cachep = p; ++*pos; return cachep->next.next == &cache_chain ? NULL - : list_entry(cachep->next.next, kmem_cache_t, next); + : list_entry(cachep->next.next, struct kmem_cache, next); } static void s_stop(struct seq_file *m, void *p) @@ -3528,7 +3528,7 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { - kmem_cache_t *cachep = p; + struct kmem_cache *cachep = p; struct list_head *q; struct slab *slabp; unsigned long active_objs; @@ -3678,7 +3678,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, mutex_lock(&cache_chain_mutex); res = -EINVAL; list_for_each(p, &cache_chain) { - kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, + next); if (!strcmp(cachep->name, kbuf)) { if (limit < 1 || -- cgit 1.2.3-korg From 7fd6b1413082c303613fc137aca9a004740cacf0 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Feb 2006 03:05:52 -0800 Subject: [PATCH] slab: fix kzalloc and kstrdup caller report for CONFIG_DEBUG_SLAB Fix kzalloc() and kstrdup() caller report for CONFIG_DEBUG_SLAB. We must pass the caller to __cache_alloc() instead of directly doing __builtin_return_address(0) there; otherwise kzalloc() and kstrdup() are reported as the allocation site instead of the real one. Thanks to Valdis Kletnieks for reporting the problem and Steven Rostedt for the original idea. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 7 +++++++ mm/slab.c | 29 ++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'mm/slab.c') diff --git a/include/linux/slab.h b/include/linux/slab.h index 1fb77a9cc148d2..8cf52939d0ab67 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -76,7 +76,14 @@ struct cache_sizes { kmem_cache_t *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; + +#ifndef CONFIG_DEBUG_SLAB extern void *__kmalloc(size_t, gfp_t); +#else +extern void *__kmalloc_track_caller(size_t, gfp_t, void*); +#define __kmalloc(size, flags) \ + __kmalloc_track_caller(size, flags, __builtin_return_address(0)) +#endif static inline void *kmalloc(size_t size, gfp_t flags) { diff --git a/mm/slab.c b/mm/slab.c index 6fbd6a1cdeb4a3..67527268b01c08 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2687,7 +2687,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) return objp; } -static inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static __always_inline void * +__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) { unsigned long save_flags; void *objp; @@ -2698,7 +2699,7 @@ static inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags) objp = ____cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, - __builtin_return_address(0)); + caller); prefetchw(objp); return objp; } @@ -2927,7 +2928,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { - return __cache_alloc(cachep, flags); + return __cache_alloc(cachep, flags, __builtin_return_address(0)); } EXPORT_SYMBOL(kmem_cache_alloc); @@ -3041,7 +3042,8 @@ EXPORT_SYMBOL(kmalloc_node); * platforms. For example, on i386, it means that the memory must come * from the first 16MB. */ -void *__kmalloc(size_t size, gfp_t flags) +static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, + void *caller) { struct kmem_cache *cachep; @@ -3053,10 +3055,27 @@ void *__kmalloc(size_t size, gfp_t flags) cachep = __find_general_cachep(size, flags); if (unlikely(cachep == NULL)) return NULL; - return __cache_alloc(cachep, flags); + return __cache_alloc(cachep, flags, caller); +} + +#ifndef CONFIG_DEBUG_SLAB + +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc(size, flags, NULL); } EXPORT_SYMBOL(__kmalloc); +#else + +void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) +{ + return __do_kmalloc(size, flags, caller); +} +EXPORT_SYMBOL(__kmalloc_track_caller); + +#endif + #ifdef CONFIG_SMP /** * __alloc_percpu - allocate one copy of the object for every present -- cgit 1.2.3-korg From a70773ddb96b74c7afe5a5bc859ba45e3d02899e Mon Sep 17 00:00:00 2001 From: "Randy.Dunlap" Date: Wed, 1 Feb 2006 03:05:52 -0800 Subject: [PATCH] mm/slab: add kernel-doc for one function Fix kernel-doc for calculate_slab_order(). Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 67527268b01c08..afe9c5f8c57a63 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1559,8 +1559,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) } /** - * calculate_slab_order - calculate size (page order) of slabs and the number - * of objects per slab. + * calculate_slab_order - calculate size (page order) of slabs + * @cachep: pointer to the cache that is being created + * @size: size of objects to be created in this cache. + * @align: required alignment for the objects. + * @flags: slab allocation flags + * + * Also calculates the number of objects per slab. * * This could be made much more intelligent. For now, try to avoid using * high order pages for slabs. When the gfp() functions are more friendly -- cgit 1.2.3-korg From ee13d785eac1fbe7e79ecca77bf7e902734a0b30 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Feb 2006 03:05:53 -0800 Subject: [PATCH] slab: fix sparse warning mm/slab.c:1522:13: error: incompatible types for operation (&) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index afe9c5f8c57a63..71370256a7eb11 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1571,8 +1571,8 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) * high order pages for slabs. When the gfp() functions are more friendly * towards high-order requests, this should be changed. */ -static inline size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, - size_t align, gfp_t flags) +static inline size_t calculate_slab_order(struct kmem_cache *cachep, + size_t size, size_t align, unsigned long flags) { size_t left_over = 0; -- cgit 1.2.3-korg From 2e1217cf96b54d3b2d0162930608159e73507fbf Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sat, 4 Feb 2006 23:27:56 -0800 Subject: [PATCH] NUMA slab locking fixes: move color_next to l3 colour_next is used as an index to add a colouring offset to a new slab in the cache (colour_off * colour_next). Now with the NUMA aware slab allocator, it makes sense to colour slabs added on the same node sequentially with colour_next. This patch moves the colouring index "colour_next" per-node by placing it on kmem_list3 rather than kmem_cache. This also helps simplify locking for CPU up and down paths. Signed-off-by: Alok N Kataria Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 71370256a7eb11..2317096166dde3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -294,6 +294,7 @@ struct kmem_list3 { unsigned long next_reap; int free_touched; unsigned int free_limit; + unsigned int colour_next; /* Per-node cache coloring */ spinlock_t list_lock; struct array_cache *shared; /* shared per node */ struct array_cache **alien; /* on other nodes */ @@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent) INIT_LIST_HEAD(&parent->slabs_free); parent->shared = NULL; parent->alien = NULL; + parent->colour_next = 0; spin_lock_init(&parent->list_lock); parent->free_objects = 0; parent->free_touched = 0; @@ -390,7 +392,6 @@ struct kmem_cache { size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ - unsigned int colour_next; /* cache colouring */ struct kmem_cache *slabp_cache; unsigned int slab_size; unsigned int dflags; /* dynamic flags */ @@ -1119,7 +1120,6 @@ void __init kmem_cache_init(void) BUG(); cache_cache.colour = left_over / cache_cache.colour_off; - cache_cache.colour_next = 0; cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size()); @@ -2324,18 +2324,19 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) */ ctor_flags |= SLAB_CTOR_ATOMIC; - /* About to mess with non-constant members - lock. */ + /* Take the l3 list lock to change the colour_next on this node */ check_irq_off(); - spin_lock(&cachep->spinlock); + l3 = cachep->nodelists[nodeid]; + spin_lock(&l3->list_lock); /* Get colour for the slab, and cal the next value. */ - offset = cachep->colour_next; - cachep->colour_next++; - if (cachep->colour_next >= cachep->colour) - cachep->colour_next = 0; - offset *= cachep->colour_off; + offset = l3->colour_next; + l3->colour_next++; + if (l3->colour_next >= cachep->colour) + l3->colour_next = 0; + spin_unlock(&l3->list_lock); - spin_unlock(&cachep->spinlock); + offset *= cachep->colour_off; check_irq_off(); if (local_flags & __GFP_WAIT) @@ -2367,7 +2368,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) if (local_flags & __GFP_WAIT) local_irq_disable(); check_irq_off(); - l3 = cachep->nodelists[nodeid]; spin_lock(&l3->list_lock); /* Make slab active. */ -- cgit 1.2.3-korg From ca3b9b91735316f0ec7f01976f85842e0bfe5c6e Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sat, 4 Feb 2006 23:27:58 -0800 Subject: [PATCH] NUMA slab locking fixes: irq disabling from cahep->spinlock to l3 lock Earlier, we had to disable on chip interrupts while taking the cachep->spinlock because, at cache_grow, on every addition of a slab to a slab cache, we incremented colour_next which was protected by the cachep->spinlock, and cache_grow could occur at interrupt context. Since, now we protect the per-node colour_next with the node's list_lock, we do not need to disable on chip interrupts while taking the per-cache spinlock, but we just need to disable interrupts when taking the per-node kmem_list3 list_lock. Signed-off-by: Alok N Kataria Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 2317096166dde3..d3f68543f9f4e3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -987,7 +987,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, cpumask_t mask; mask = node_to_cpumask(node); - spin_lock_irq(&cachep->spinlock); + spin_lock(&cachep->spinlock); /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; @@ -996,7 +996,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (!l3) goto unlock_cache; - spin_lock(&l3->list_lock); + spin_lock_irq(&l3->list_lock); /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; @@ -1004,7 +1004,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, free_block(cachep, nc->entry, nc->avail, node); if (!cpus_empty(mask)) { - spin_unlock(&l3->list_lock); + spin_unlock_irq(&l3->list_lock); goto unlock_cache; } @@ -1023,13 +1023,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, /* free slabs belonging to this node */ if (__node_shrink(cachep, node)) { cachep->nodelists[node] = NULL; - spin_unlock(&l3->list_lock); + spin_unlock_irq(&l3->list_lock); kfree(l3); } else { - spin_unlock(&l3->list_lock); + spin_unlock_irq(&l3->list_lock); } unlock_cache: - spin_unlock_irq(&cachep->spinlock); + spin_unlock(&cachep->spinlock); kfree(nc); } mutex_unlock(&cache_chain_mutex); @@ -2011,18 +2011,18 @@ static void drain_cpu_caches(struct kmem_cache *cachep) smp_call_function_all_cpus(do_drain, cachep); check_irq_on(); - spin_lock_irq(&cachep->spinlock); + spin_lock(&cachep->spinlock); for_each_online_node(node) { l3 = cachep->nodelists[node]; if (l3) { - spin_lock(&l3->list_lock); + spin_lock_irq(&l3->list_lock); drain_array_locked(cachep, l3->shared, 1, node); - spin_unlock(&l3->list_lock); + spin_unlock_irq(&l3->list_lock); if (l3->alien) drain_alien_cache(cachep, l3); } } - spin_unlock_irq(&cachep->spinlock); + spin_unlock(&cachep->spinlock); } static int __node_shrink(struct kmem_cache *cachep, int node) @@ -2338,7 +2338,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) offset *= cachep->colour_off; - check_irq_off(); if (local_flags & __GFP_WAIT) local_irq_enable(); @@ -2725,6 +2724,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node BUG_ON(!l3); retry: + check_irq_off(); spin_lock(&l3->list_lock); entry = l3->slabs_partial.next; if (entry == &l3->slabs_partial) { @@ -3304,11 +3304,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); check_irq_on(); - spin_lock_irq(&cachep->spinlock); + spin_lock(&cachep->spinlock); cachep->batchcount = batchcount; cachep->limit = limit; cachep->shared = shared; - spin_unlock_irq(&cachep->spinlock); + spin_unlock(&cachep->spinlock); for_each_online_cpu(i) { struct array_cache *ccold = new.new[i]; @@ -3564,8 +3564,7 @@ static int s_show(struct seq_file *m, void *p) int node; struct kmem_list3 *l3; - check_irq_on(); - spin_lock_irq(&cachep->spinlock); + spin_lock(&cachep->spinlock); active_objs = 0; num_slabs = 0; for_each_online_node(node) { @@ -3573,7 +3572,8 @@ static int s_show(struct seq_file *m, void *p) if (!l3) continue; - spin_lock(&l3->list_lock); + check_irq_on(); + spin_lock_irq(&l3->list_lock); list_for_each(q, &l3->slabs_full) { slabp = list_entry(q, struct slab, list); @@ -3600,7 +3600,7 @@ static int s_show(struct seq_file *m, void *p) free_objects += l3->free_objects; shared_avail += l3->shared->avail; - spin_unlock(&l3->list_lock); + spin_unlock_irq(&l3->list_lock); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -3644,7 +3644,7 @@ static int s_show(struct seq_file *m, void *p) } #endif seq_putc(m, '\n'); - spin_unlock_irq(&cachep->spinlock); + spin_unlock(&cachep->spinlock); return 0; } -- cgit 1.2.3-korg From 4484ebf12bdb0ebcdc6e8951243cbab3d7f6f4c1 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sat, 4 Feb 2006 23:27:59 -0800 Subject: [PATCH] NUMA slab locking fixes: fix cpu down and up locking This fixes locking and bugs in cpu_down and cpu_up paths of the NUMA slab allocator. Sonny Rao reported problems sometime back on POWER5 boxes, when the last cpu on the nodes were being offlined. We could not reproduce the same on x86_64 because the cpumask (node_to_cpumask) was not being updated on cpu down. Since that issue is now fixed, we can reproduce Sonny's problems on x86_64 NUMA, and here is the fix. The problem earlier was on CPU_DOWN, if it was the last cpu on the node to go down, the array_caches (shared, alien) and the kmem_list3 of the node were being freed (kfree) with the kmem_list3 lock held. If the l3 or the array_caches were to come from the same cache being cleared, we hit on badness. This patch cleans up the locking in cpu_up and cpu_down path. We cannot really free l3 on cpu down because, there is no node offlining yet and even though a cpu is not yet up, node local memory can be allocated for it. So l3s are usually allocated at keme_cache_create and destroyed at kmem_cache_destroy. Hence, we don't need cachep->spinlock protection to get to the cachep->nodelist[nodeid] either. Patch survived onlining and offlining on a 4 core 2 node Tyan box with a 4 dbench process running all the time. Signed-off-by: Alok N Kataria Signed-off-by: Ravikiran Thirumalai Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 123 +++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 38 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index d3f68543f9f4e3..9cc049a942c6b6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -884,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep, } } -static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) +static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) { int i = 0; struct array_cache *ac; unsigned long flags; for_each_online_node(i) { - ac = l3->alien[i]; + ac = alien[i]; if (ac) { spin_lock_irqsave(&ac->lock, flags); __drain_alien_cache(cachep, ac, i); @@ -901,8 +901,11 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) } #else #define alloc_alien_cache(node, limit) do { } while (0) -#define free_alien_cache(ac_ptr) do { } while (0) -#define drain_alien_cache(cachep, l3) do { } while (0) +#define drain_alien_cache(cachep, alien) do { } while (0) + +static inline void free_alien_cache(struct array_cache **ac_ptr) +{ +} #endif static int __devinit cpuup_callback(struct notifier_block *nfb, @@ -936,6 +939,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; + /* + * The l3s don't come and go as CPUs come and + * go. cache_chain_mutex is sufficient + * protection here. + */ cachep->nodelists[node] = l3; } @@ -950,26 +958,47 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, & array cache's */ list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; + struct array_cache *shared; + struct array_cache **alien; nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); + cachep->batchcount); if (!nc) goto bad; + shared = alloc_arraycache(node, + cachep->shared * cachep->batchcount, + 0xbaadf00d); + if (!shared) + goto bad; +#ifdef CONFIG_NUMA + alien = alloc_alien_cache(node, cachep->limit); + if (!alien) + goto bad; +#endif cachep->array[cpu] = nc; l3 = cachep->nodelists[node]; BUG_ON(!l3); - if (!l3->shared) { - if (!(nc = alloc_arraycache(node, - cachep->shared * - cachep->batchcount, - 0xbaadf00d))) - goto bad; - /* we are serialised from CPU_DEAD or - CPU_UP_CANCELLED by the cpucontrol lock */ - l3->shared = nc; + spin_lock_irq(&l3->list_lock); + if (!l3->shared) { + /* + * We are serialised from CPU_DEAD or + * CPU_UP_CANCELLED by the cpucontrol lock + */ + l3->shared = shared; + shared = NULL; } +#ifdef CONFIG_NUMA + if (!l3->alien) { + l3->alien = alien; + alien = NULL; + } +#endif + spin_unlock_irq(&l3->list_lock); + + kfree(shared); + free_alien_cache(alien); } mutex_unlock(&cache_chain_mutex); break; @@ -978,23 +1007,32 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + /* + * Even if all the cpus of a node are down, we don't free the + * kmem_list3 of any cache. This to avoid a race between + * cpu_down, and a kmalloc allocation from another cpu for + * memory from the node of the cpu going down. The list3 + * structure is usually allocated from kmem_cache_create() and + * gets destroyed at kmem_cache_destroy(). + */ /* fall thru */ case CPU_UP_CANCELED: mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; + struct array_cache *shared; + struct array_cache **alien; cpumask_t mask; mask = node_to_cpumask(node); - spin_lock(&cachep->spinlock); /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; l3 = cachep->nodelists[node]; if (!l3) - goto unlock_cache; + goto free_array_cache; spin_lock_irq(&l3->list_lock); @@ -1005,33 +1043,43 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (!cpus_empty(mask)) { spin_unlock_irq(&l3->list_lock); - goto unlock_cache; + goto free_array_cache; } - if (l3->shared) { + shared = l3->shared; + if (shared) { free_block(cachep, l3->shared->entry, l3->shared->avail, node); - kfree(l3->shared); l3->shared = NULL; } - if (l3->alien) { - drain_alien_cache(cachep, l3); - free_alien_cache(l3->alien); - l3->alien = NULL; - } - /* free slabs belonging to this node */ - if (__node_shrink(cachep, node)) { - cachep->nodelists[node] = NULL; - spin_unlock_irq(&l3->list_lock); - kfree(l3); - } else { - spin_unlock_irq(&l3->list_lock); + alien = l3->alien; + l3->alien = NULL; + + spin_unlock_irq(&l3->list_lock); + + kfree(shared); + if (alien) { + drain_alien_cache(cachep, alien); + free_alien_cache(alien); } - unlock_cache: - spin_unlock(&cachep->spinlock); +free_array_cache: kfree(nc); } + /* + * In the previous loop, all the objects were freed to + * the respective cache's slabs, now we can go ahead and + * shrink each nodelist to its limit. + */ + list_for_each_entry(cachep, &cache_chain, next) { + l3 = cachep->nodelists[node]; + if (!l3) + continue; + spin_lock_irq(&l3->list_lock); + /* free slabs belonging to this node */ + __node_shrink(cachep, node); + spin_unlock_irq(&l3->list_lock); + } mutex_unlock(&cache_chain_mutex); break; #endif @@ -2011,7 +2059,6 @@ static void drain_cpu_caches(struct kmem_cache *cachep) smp_call_function_all_cpus(do_drain, cachep); check_irq_on(); - spin_lock(&cachep->spinlock); for_each_online_node(node) { l3 = cachep->nodelists[node]; if (l3) { @@ -2019,10 +2066,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep) drain_array_locked(cachep, l3->shared, 1, node); spin_unlock_irq(&l3->list_lock); if (l3->alien) - drain_alien_cache(cachep, l3); + drain_alien_cache(cachep, l3->alien); } } - spin_unlock(&cachep->spinlock); } static int __node_shrink(struct kmem_cache *cachep, int node) @@ -3440,7 +3486,7 @@ static void cache_reap(void *unused) l3 = searchp->nodelists[numa_node_id()]; if (l3->alien) - drain_alien_cache(searchp, l3); + drain_alien_cache(searchp, l3->alien); spin_lock_irq(&l3->list_lock); drain_array_locked(searchp, cpu_cache_get(searchp), 0, @@ -3598,7 +3644,8 @@ static int s_show(struct seq_file *m, void *p) num_slabs++; } free_objects += l3->free_objects; - shared_avail += l3->shared->avail; + if (l3->shared) + shared_avail += l3->shared->avail; spin_unlock_irq(&l3->list_lock); } -- cgit 1.2.3-korg From 7a21ef6fe902ac0ad53b45af6851ae5ec3a64299 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Feb 2006 11:26:38 -0800 Subject: mm/slab.c (non-NUMA): Fix compile warning and clean up code The non-NUMA case would do an unmatched "free_alien_cache()" on an alien pointer that had never been allocated. It might not matter from a code generation standpoint (since in the non-NUMA case, the code doesn't actually _do_ anything), but it not only results in a compiler warning, it's really really ugly too. Fix the compiler warning by just having a matching dummy allocation. That also avoids an unnecessary #ifdef in the code. Signed-off-by: Linus Torvalds --- mm/slab.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 9cc049a942c6b6..d66c2b0d9715ab 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -900,12 +900,18 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al } } #else -#define alloc_alien_cache(node, limit) do { } while (0) + #define drain_alien_cache(cachep, alien) do { } while (0) +static inline struct array_cache **alloc_alien_cache(int node, int limit) +{ + return (struct array_cache **) 0x01020304ul; +} + static inline void free_alien_cache(struct array_cache **ac_ptr) { } + #endif static int __devinit cpuup_callback(struct notifier_block *nfb, @@ -970,11 +976,10 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, 0xbaadf00d); if (!shared) goto bad; -#ifdef CONFIG_NUMA + alien = alloc_alien_cache(node, cachep->limit); if (!alien) goto bad; -#endif cachep->array[cpu] = nc; l3 = cachep->nodelists[node]; -- cgit 1.2.3-korg