Mm/slab.c

35266 /* 35267 * linux/mm/slab.c 35268 * Written by Mark Hemment, 1996/97. 35269 * (markhe@nextd.demon.co.uk) 35270 * 35271 * 11 April '97. Started multi-threading - markhe 35272 * The global cache-chain is protected by the 35273 * semaphore 'cache_chain_sem'. The sem is only 35274 * needed when accessing/extending the cache-chain, 35275 * which can never happen inside an interrupt 35276 * (kmem_cache_create(), kmem_cache_shrink() and 35277 * kmem_cache_reap()). This is a medium-term 35278 * exclusion lock. 35279 * 35280 * Each cache has its own lock; 'c_spinlock'. This 35281 * lock is needed only when accessing non-constant 35282 * members of a cache-struct. Note: 'constant 35283 * members' are assigned a value in 35284 * kmem_cache_create() before the cache is linked 35285 * into the cache-chain. The values never change, 35286 * so not even a multi-reader lock is needed for 35287 * these members. The c_spinlock is only ever held 35288 * for a few cycles. 35289 * 35290 * To prevent kmem_cache_shrink() trying to shrink a 35291 * 'growing' cache (which maybe be sleeping and 35292 * therefore not holding the semaphore/lock), the 35293 * c_growing field is used. This also prevents 35294 * reaping from a cache. 35295 * 35296 * Note, caches can _never_ be destroyed. When a 35297 * sub-system (eg module) has finished with a cache, 35298 * it can only be shrunk. This leaves the cache 35299 * empty, but already enabled for re-use, eg. during 35300 * a module re-load. 35301 * 35302 * Notes: 35303 * o Constructors/deconstructors are called 35304 * while the cache-lock is _not_ held. 35305 * Therefore they _must_ be threaded. 35306 * o Constructors must not attempt to 35307 * allocate memory from the same cache that 35308 * they are a constructor for - infinite 35309 * loop! (There is no easy way to trap 35310 * this.) 35311 * o The per-cache locks must be obtained 35312 * with local-interrupts disabled. o When 35313 * compiled with debug support, and an 35314 * object-verify (upon release) is request 35315 * for a cache, the verify-function is 35316 * called with the cache lock held. This 35317 * helps debugging. 35318 * o The functions called from 35319 * try_to_free_page() must not attempt to 35320 * allocate memory from a cache which is 35321 * being grown. The buffer sub-system might 35322 * try to allocate memory, via 35323 * buffer_cachep. As this pri is passed to 35324 * the SLAB, and then (if necessary) onto 35325 * the gfp() funcs (which avoid calling 35326 * try_to_free_page()), no deadlock should 35327 * happen. 35328 * 35329 * The positioning of the per-cache lock is tricky. 35330 * If the lock is placed on the same h/w cache line 35331 * as commonly accessed members the number of L1 35332 * cache-line faults is reduced. However, this can 35333 * lead to the cache-line ping-ponging between 35334 * processors when the lock is in contention (and 35335 * the common members are being accessed). Decided 35336 * to keep it away from common members. 35337 * 35338 * More fine-graining is possible, with per-slab 35339 * locks...but this might be taking fine graining 35340 * too far, but would have the advantage; 35341 * 35342 * During most allocs/frees no writes occur 35343 * to the cache-struct. Therefore a 35344 * multi-reader/one writer lock could be 35345 * used (the writer needed when the slab 35346 * chain is being link/unlinked). As we 35347 * would not have an exclusion lock for the 35348 * cache-structure, one would be needed 35349 * per-slab (for updating s_free ptr, and/or 35350 * the contents of s_index). 35351 * 35352 * The above locking would allow parallel operations 35353 * to different slabs within the same cache with 35354 * reduced spinning. 35355 * 35356 * Per-engine slab caches, backed by a global cache 35357 * (as in Mach's Zone allocator), would allow most 35358 * allocations from the same cache to execute in 35359 * parallel. 35360 * 35361 * At present, each engine can be growing a cache. 35362 * This should be blocked. 35363 * 35364 * It is not currently 100% safe to examine the 35365 * page_struct outside of a kernel or global cli 35366 * lock. The risk is v. small, and non-fatal. 35367 * 35368 * Calls to printk() are not 100% safe (the function 35369 * is not threaded). However, printk() is only used 35370 * under an error condition, and the risk is 35371 * v. small (not sure if the console write functions 35372 * 'enjoy' executing multiple contexts in parallel. 35373 * I guess they don't...). Note, for most calls to 35374 * printk() any held cache-lock is dropped. This is 35375 * not always done for text size reasons - having 35376 * *_unlock() everywhere is bloat. */ 35377 35378 /* An implementation of the Slab Allocator as described 35379 * in outline in; 35380 * UNIX Internals: The New Frontiers by Uresh Vahalia 35381 * Pub: Prentice Hall ISBN 0-13-101908-2 35382 * or with a little more detail in; 35383 * The Slab Allocator: An Object-Caching Kernel 35384 * Memory Allocator 35385 * Jeff Bonwick (Sun Microsystems). 35386 * Presented at: USENIX Summer 1994 Technical 35387 * Conference */ 35388 35389 /* This implementation deviates from Bonwick's paper as 35390 * it does not use a hash-table for large objects, but 35391 * rather a per slab index to hold the bufctls. This 35392 * allows the bufctl structure to be small (one word), 35393 * but limits the number of objects a slab (not a cache) 35394 * can contain when off-slab bufctls are used. The limit 35395 * is the size of the largest general cache that does not 35396 * use off-slab bufctls, divided by the size of a bufctl. 35397 * For 32bit archs, is this 256/4 = 64. This is not 35398 * serious, as it is only for large objects, when it is 35399 * unwise to have too many per slab. 35400 * 35401 * Note: This limit can be raised by introducing a 35402 * general cache whose size is less than 512 35403 * (PAGE_SIZE<<3), but greater than 256. */ 35404 35405 #include <linux/config.h> 35406 #include <linux/slab.h> 35407 #include <linux/interrupt.h> 35408 #include <linux/init.h> 35409 35410 /* If there is a different PAGE_SIZE around, and it works 35411 * with this allocator, then change the following. */ 35412 #if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096) 35413 #error Your page size is probably not correctly \ 35414 supported - please check 35415 #endif 35416 35417 /* SLAB_MGMT_CHECKS - 1 to enable extra checks in 35418 * kmem_cache_create(). 35419 * 0 if you wish to reduce memory 35420 * usage. 35421 * 35422 * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to 35423 * honour; SLAB_DEBUG_FREE, 35424 * SLAB_DEBUG_INITIAL, 35425 * SLAB_RED_ZONE & SLAB_POISON. 35426 * 0 for faster, smaller, code 35427 * (especially in the critical 35428 * paths). 35429 * 35430 * SLAB_STATS - 1 to collect stats for 35431 * /proc/slabinfo. 35432 * 0 for faster, smaller, code 35433 * (especially in the critical 35434 * paths). 35435 * 35436 * SLAB_SELFTEST - 1 to perform a few tests, mainly 35437 * for development. */ 35438 #define SLAB_MGMT_CHECKS 1 35439 #define SLAB_DEBUG_SUPPORT 0 35440 #define SLAB_STATS 0 35441 #define SLAB_SELFTEST 0 35442 35443 /* Shouldn't this be in a header file somewhere? */ 35444 #define BYTES_PER_WORD sizeof(void *) 35445 35446 /* Legal flag mask for kmem_cache_create(). */ 35447 #if SLAB_DEBUG_SUPPORT 35448 #if 0 35449 #define SLAB_C_MASK \ 35450 (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \ 35451 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \ 35452 SLAB_HIGH_PACK) \ 35453 #endif 35454 #define SLAB_C_MASK \ 35455 (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \ 35456 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP) 35457 #else 35458 #if 0 35459 #define SLAB_C_MASK \ 35460 (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK) 35461 #endif 35462 #define SLAB_C_MASK \ 35463 (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP) 35464 #endif /* SLAB_DEBUG_SUPPORT */ 35465 35466 /* Slab management struct. Manages the objs in a slab. 35467 * Placed either at the end of mem allocated for a slab, 35468 * or from an internal obj cache (cache_slabp). Slabs 35469 * are chained into a partially ordered list; fully used 35470 * first, partial next, and then fully free slabs. The 35471 * first 4 members are referenced during an alloc/free 35472 * operation, and should always appear on the same cache 35473 * line. Note: The offset between some members _must_ 35474 * match offsets within the kmem_cache_t - see 35475 * kmem_cache_init() for the checks. */ 35476 35477 /* could make this larger for 64bit archs */ 35478 #define SLAB_OFFSET_BITS 16 35479 35480 typedef struct kmem_slab_s { 35481 /* ptr to first inactive obj in slab */ 35482 struct kmem_bufctl_s *s_freep; 35483 struct kmem_bufctl_s *s_index; 35484 unsigned long s_magic; 35485 /* num of objs active in slab */ 35486 unsigned long s_inuse; 35487 35488 struct kmem_slab_s *s_nextp; 35489 struct kmem_slab_s *s_prevp; 35490 /* addr of first obj in slab */ 35491 void *s_mem; 35492 unsigned long s_offset:SLAB_OFFSET_BITS, 35493 s_dma:1; 35494 } kmem_slab_t; 35495 35496 /* When the slab management is on-slab, this gives the 35497 * size to use. */ 35498 #define slab_align_size \ 35499 (L1_CACHE_ALIGN(sizeof(kmem_slab_t))) 35500 35501 /* Test for end of slab chain. */ 35502 #define kmem_slab_end(x) \ 35503 ((kmem_slab_t*)&((x)->c_offset)) 35504 35505 /* s_magic */ 35506 #define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* alive */ 35507 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* destroyed */ 35508 35509 /* Bufctl's are used for linking objs within a slab, 35510 * identifying what slab an obj is in, and the address of 35511 * the associated obj (for sanity checking with off-slab 35512 * bufctls). What a bufctl contains depends upon the 35513 * state of the obj and the organisation of the cache. */ 35514 typedef struct kmem_bufctl_s { 35515 union { 35516 struct kmem_bufctl_s *buf_nextp; 35517 kmem_slab_t *buf_slabp; /* slab for obj */ 35518 void * buf_objp; 35519 } u; 35520 } kmem_bufctl_t; 35521 35522 /* ...shorthand... */ 35523 #define buf_nextp u.buf_nextp 35524 #define buf_slabp u.buf_slabp 35525 #define buf_objp u.buf_objp 35526 35527 #if SLAB_DEBUG_SUPPORT 35528 /* Magic nums for obj red zoning. Placed in the first 35529 * word before and the first word after an obj. */ 35530 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* obj active */ 35531 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* obj inactive */ 35532 35533 /* ...and for poisoning */ 35534 #define SLAB_POISON_BYTE 0x5a /* byte val for poisoning*/ 35535 #define SLAB_POISON_END 0xa5 /* end-byte of poisoning */ 35536 35537 #endif /* SLAB_DEBUG_SUPPORT */ 35538 35539 /* Cache struct - manages a cache. First four members 35540 * are commonly referenced during an alloc/free 35541 * operation. */ 35542 struct kmem_cache_s { 35543 kmem_slab_t *c_freep; /* first w/ free objs */ 35544 unsigned long c_flags; /* constant flags */ 35545 unsigned long c_offset; 35546 unsigned long c_num; /* # of objs per slab */ 35547 35548 unsigned long c_magic; 35549 unsigned long c_inuse; /* kept at zero */ 35550 kmem_slab_t *c_firstp; /* first slab in chain */ 35551 kmem_slab_t *c_lastp; /* last slab in chain */ 35552 35553 spinlock_t c_spinlock; 35554 unsigned long c_growing; 35555 unsigned long c_dflags; /* dynamic flags */ 35556 size_t c_org_size; 35557 unsigned long c_gfporder; /* ord pgs per slab (2^n) */ 35558 /* constructor func */ 35559 void (*c_ctor)(void *, kmem_cache_t *, unsigned long); 35560 /* de-constructor func */ 35561 void (*c_dtor)(void *, kmem_cache_t *, unsigned long); 35562 unsigned long c_align; /* alignment of objs */ 35563 size_t c_colour; /* cache coloring range*/ 35564 size_t c_colour_next; /* cache coloring */ 35565 unsigned long c_failures; 35566 const char *c_name; 35567 struct kmem_cache_s *c_nextp; 35568 kmem_cache_t *c_index_cachep; 35569 #if SLAB_STATS 35570 unsigned long c_num_active; 35571 unsigned long c_num_allocations; 35572 unsigned long c_high_mark; 35573 unsigned long c_grown; 35574 unsigned long c_reaped; 35575 atomic_t c_errors; 35576 #endif /* SLAB_STATS */ 35577 }; 35578 35579 /* internal c_flags */ 35580 /* slab management in own cache */ 35581 #define SLAB_CFLGS_OFF_SLAB 0x010000UL 35582 /* bufctls in own cache */ 35583 #define SLAB_CFLGS_BUFCTL 0x020000UL 35584 /* a general cache */ 35585 #define SLAB_CFLGS_GENERAL 0x080000UL 35586 35587 /* c_dflags (dynamic flags). Need to hold the spinlock 35588 * to access this member */ 35589 /* don't reap a recently grown */ 35590 #define SLAB_CFLGS_GROWN 0x000002UL 35591 35592 #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB) 35593 #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL) 35594 #define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN) 35595 35596 #if SLAB_STATS 35597 #define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++) 35598 #define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--) 35599 #define SLAB_STATS_INC_ALLOCED(x) \ 35600 ((x)->c_num_allocations++) 35601 #define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++) 35602 #define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++) 35603 #define SLAB_STATS_SET_HIGH(x) \ 35604 do { \ 35605 if ((x)->c_num_active > (x)->c_high_mark) \ 35606 (x)->c_high_mark = (x)->c_num_active; \ 35607 } while (0) 35608 #define SLAB_STATS_INC_ERR(x) \ 35609 (atomic_inc(&(x)->c_errors)) 35610 #else 35611 #define SLAB_STATS_INC_ACTIVE(x) 35612 #define SLAB_STATS_DEC_ACTIVE(x) 35613 #define SLAB_STATS_INC_ALLOCED(x) 35614 #define SLAB_STATS_INC_GROWN(x) 35615 #define SLAB_STATS_INC_REAPED(x) 35616 #define SLAB_STATS_SET_HIGH(x) 35617 #define SLAB_STATS_INC_ERR(x) 35618 #endif /* SLAB_STATS */ 35619 35620 #if SLAB_SELFTEST 35621 #if !SLAB_DEBUG_SUPPORT 35622 #error Debug support needed for self-test 35623 #endif 35624 static void kmem_self_test(void); 35625 #endif /* SLAB_SELFTEST */ 35626 35627 /* c_magic - used to detect 'out of slabs' in 35628 * __kmem_cache_alloc() */ 35629 #define SLAB_C_MAGIC 0x4F17A36DUL 35630 35631 /* maximum size of an obj (in 2^order pages) */ 35632 #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */ 35633 35634 /* maximum num of pages for a slab (prevents large 35635 * requests to the VM layer) */ 35636 #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */ 35637 35638 /* the 'preferred' minimum num of objs per slab - maybe 35639 * less for large objs */ 35640 #define SLAB_MIN_OBJS_PER_SLAB 4 35641 35642 /* If the num of objs per slab is <= 35643 * SLAB_MIN_OBJS_PER_SLAB, then the page order must be 35644 * less than this before trying the next order. */ 35645 #define SLAB_BREAK_GFP_ORDER_HI 2 35646 #define SLAB_BREAK_GFP_ORDER_LO 1 35647 static int slab_break_gfp_order = 35648 SLAB_BREAK_GFP_ORDER_LO; 35649 35650 /* Macros for storing/retrieving the cachep and or slab 35651 * from the global 'mem_map'. With off-slab bufctls, 35652 * these are used to find the slab an obj belongs to. 35653 * With kmalloc(), and kfree(), these are used to find 35654 * the cache which an obj belongs to. */ 35655 #define SLAB_SET_PAGE_CACHE(pg, x) \ 35656 ((pg)->next = (struct page *)(x)) 35657 #define SLAB_GET_PAGE_CACHE(pg) \ 35658 ((kmem_cache_t *)(pg)->next) 35659 #define SLAB_SET_PAGE_SLAB(pg, x) \ 35660 ((pg)->prev = (struct page *)(x)) 35661 #define SLAB_GET_PAGE_SLAB(pg) \ 35662 ((kmem_slab_t *)(pg)->prev) 35663 35664 /* Size description struct for general caches. */ 35665 typedef struct cache_sizes { 35666 size_t cs_size; 35667 kmem_cache_t *cs_cachep; 35668 } cache_sizes_t; 35669 35670 static cache_sizes_t cache_sizes[] = { 35671 #if PAGE_SIZE == 4096 35672 { 32, NULL}, 35673 #endif 35674 { 64, NULL}, 35675 { 128, NULL}, 35676 { 256, NULL}, 35677 { 512, NULL}, 35678 {1024, NULL}, 35679 {2048, NULL}, 35680 {4096, NULL}, 35681 {8192, NULL}, 35682 {16384, NULL}, 35683 {32768, NULL}, 35684 {65536, NULL}, 35685 {131072, NULL}, 35686 {0, NULL} 35687 }; 35688 35689 /* Names for the general caches. Not placed into the 35690 * sizes struct for a good reason; the string ptr is not 35691 * needed while searching in kmalloc(), and would 35692 * 'get-in-the-way' in the h/w cache. */ 35693 static char *cache_sizes_name[] = { 35694 #if PAGE_SIZE == 4096 35695 "size-32", 35696 #endif 35697 "size-64", 35698 "size-128", 35699 "size-256", 35700 "size-512", 35701 "size-1024", 35702 "size-2048", 35703 "size-4096", 35704 "size-8192", 35705 "size-16384", 35706 "size-32768", 35707 "size-65536", 35708 "size-131072" 35709 }; 35710 35711 /* internal cache of cache description objs */ 35712 static kmem_cache_t cache_cache = { 35713 /* freep, flags */ kmem_slab_end(&cache_cache), 35714 SLAB_NO_REAP, 35715 /* offset, num */ sizeof(kmem_cache_t), 0, 35716 /* c_magic, c_inuse */ SLAB_C_MAGIC, 0, 35717 /* firstp, lastp */ kmem_slab_end(&cache_cache), 35718 kmem_slab_end(&cache_cache), 35719 /* spinlock */ SPIN_LOCK_UNLOCKED, 35720 /* growing */ 0, 35721 /* dflags */ 0, 35722 /* org_size, gfp */ 0, 0, 35723 /* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES, 35724 /* colour, colour_next */ 0, 0, 35725 /* failures */ 0, 35726 /* name */ "kmem_cache", 35727 /* nextp */ &cache_cache, 35728 /* index */ NULL, 35729 }; 35730 35731 /* Guard access to the cache-chain. */ 35732 static struct semaphore cache_chain_sem; 35733 35734 /* Place maintainer for reaping. */ 35735 static kmem_cache_t *clock_searchp = &cache_cache; 35736 35737 /* Internal slab management cache, for when slab 35738 * management is off-slab. */ 35739 static kmem_cache_t *cache_slabp = NULL; 35740 35741 /* Max number of objs-per-slab for caches which use 35742 * bufctl's. Needed to avoid a possible looping 35743 * condition in kmem_cache_grow(). */ 35744 static unsigned long bufctl_limit = 0; 35745 35746 /* Initialisation - setup the `cache' cache. */ 35747 long __init kmem_cache_init(long start, long end) 35748 { 35749 size_t size, i; 35750 35751 #define kmem_slab_offset(x) \ 35752 ((unsigned long)&((kmem_slab_t *)0)->x) 35753 #define kmem_slab_diff(a,b) \ 35754 (kmem_slab_offset(a) - kmem_slab_offset(b)) 35755 #define kmem_cache_offset(x) \ 35756 ((unsigned long)&((kmem_cache_t *)0)->x) 35757 #define kmem_cache_diff(a,b) \ 35758 (kmem_cache_offset(a) - kmem_cache_offset(b)) 35759 35760 /* Sanity checks... */ 35761 if (kmem_cache_diff(c_firstp, c_magic) != 35762 kmem_slab_diff(s_nextp, s_magic) 35763 kmem_cache_diff(c_firstp, c_inuse) != 35764 kmem_slab_diff(s_nextp, s_inuse) 35765 ((kmem_cache_offset(c_lastp) - 35766 ((unsigned long) 35767 kmem_slab_end((kmem_cache_t*)NULL))) != 35768 kmem_slab_offset(s_prevp)) 35769 kmem_cache_diff(c_lastp, c_firstp) != 35770 kmem_slab_diff(s_prevp, s_nextp)) { 35771 /* Offsets to the magic are incorrect, either the 35772 * structures have been incorrectly changed, or 35773 * adjustments are needed for your architecture. */ 35774 panic("kmem_cache_init(): Offsets are wrong - " 35775 "I've been messed with!"); 35776 /* NOTREACHED */ 35777 } 35778 #undef kmem_cache_offset 35779 #undef kmem_cache_diff 35780 #undef kmem_slab_offset 35781 #undef kmem_slab_diff 35782 35783 cache_chain_sem = MUTEX; 35784 35785 size = cache_cache.c_offset + sizeof(kmem_bufctl_t); 35786 size += (L1_CACHE_BYTES-1); 35787 size &= ~(L1_CACHE_BYTES-1); 35788 cache_cache.c_offset = size-sizeof(kmem_bufctl_t); 35789 35790 i = (PAGE_SIZE << cache_cache.c_gfporder) - 35791 slab_align_size; 35792 cache_cache.c_num = i / size; /* objs / slab */ 35793 35794 /* Cache colouring. */ 35795 cache_cache.c_colour = 35796 (i-(cache_cache.c_num*size))/L1_CACHE_BYTES; 35797 cache_cache.c_colour_next = cache_cache.c_colour; 35798 35799 /* Fragmentation resistance on low memory - only use 35800 * bigger page orders on machines with more than 32MB 35801 * of memory. */ 35802 if (num_physpages > (32 << 20) >> PAGE_SHIFT) 35803 slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI; 35804 return start; 35805 } 35806 35807 /* Initialisation - setup remaining internal and general 35808 * caches. Called after the gfp() functions have been 35809 * enabled, and before smp_init(). */ 35810 void __init kmem_cache_sizes_init(void) 35811 { 35812 unsigned int found = 0; 35813 35814 cache_slabp = kmem_cache_create("slab_cache", 35815 sizeof(kmem_slab_t), 0, SLAB_HWCACHE_ALIGN, 35816 NULL, NULL); 35817 if (cache_slabp) { 35818 char **names = cache_sizes_name; 35819 cache_sizes_t *sizes = cache_sizes; 35820 do { 35821 /* For performance, all the general caches are L1 35822 * aligned. This should be particularly beneficial 35823 * on SMP boxes, as it eliminates "false sharing". 35824 * Note for systems short on memory removing the 35825 * alignment will allow tighter packing of the 35826 * smaller caches. */ 35827 if (!(sizes->cs_cachep = 35828 kmem_cache_create(*names++, sizes->cs_size, 35829 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) 35830 goto panic_time; 35831 if (!found) { 35832 /* Inc off-slab bufctl limit until the ceiling is 35833 * hit. */ 35834 if (SLAB_BUFCTL(sizes->cs_cachep->c_flags)) 35835 found++; 35836 else 35837 bufctl_limit = 35838 (sizes->cs_size/sizeof(kmem_bufctl_t)); 35839 } 35840 sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL; 35841 sizes++; 35842 } while (sizes->cs_size); 35843 #if SLAB_SELFTEST 35844 kmem_self_test(); 35845 #endif /* SLAB_SELFTEST */ 35846 return; 35847 } 35848 panic_time: 35849 panic("kmem_cache_sizes_init: Error creating caches"); 35850 /* NOTREACHED */ 35851 } 35852 35853 /* Interface to system's page allocator. Dma pts to 35854 * non-zero if all of memory is DMAable. No need to hold 35855 * the cache-lock. */ 35856 static inline void * 35857 kmem_getpages(kmem_cache_t *cachep, unsigned long flags, 35858 unsigned int *dma) 35859 { 35860 void *addr; 35861 35862 *dma = flags & SLAB_DMA; 35863 addr = 35864 (void*) __get_free_pages(flags, cachep->c_gfporder); 35865 /* Assume that now we have the pages no one else can 35866 * legally messes with the 'struct page's. However 35867 * vm_scan() might try to test the structure to see if 35868 * it is a named-page or buffer-page. The members it 35869 * tests are of no interest here..... */ 35870 if (!*dma && addr) { 35871 /* Need to check if can dma. */ 35872 struct page *page = mem_map + MAP_NR(addr); 35873 *dma = 1 << cachep->c_gfporder; 35874 while ((*dma)--) { 35875 if (!PageDMA(page)) { 35876 *dma = 0; 35877 break; 35878 } 35879 page++; 35880 } 35881 } 35882 return addr; 35883 } 35884 35885 /* Interface to system's page release. */ 35886 static inline void 35887 kmem_freepages(kmem_cache_t *cachep, void *addr) 35888 { 35889 unsigned long i = (1<<cachep->c_gfporder); 35890 struct page *page = &mem_map[MAP_NR(addr)]; 35891 35892 /* free_pages() does not clear the type bit - we do 35893 * that. The pages have been unlinked from their 35894 * cache-slab, but their 'struct page's might be 35895 * accessed in vm_scan(). Shouldn't be a worry. */ 35896 while (i--) { 35897 PageClearSlab(page); 35898 page++; 35899 } 35900 free_pages((unsigned long)addr, cachep->c_gfporder); 35901 } 35902 35903 #if SLAB_DEBUG_SUPPORT 35904 static inline void 35905 kmem_poison_obj(kmem_cache_t *cachep, void *addr) 35906 { 35907 memset(addr, SLAB_POISON_BYTE, cachep->c_org_size); 35908 *(unsigned char *)(addr+cachep->c_org_size-1) = 35909 SLAB_POISON_END; 35910 } 35911 35912 static inline int 35913 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr) 35914 { 35915 void *end; 35916 end = memchr(addr, SLAB_POISON_END, 35917 cachep->c_org_size); 35918 if (end != (addr+cachep->c_org_size-1)) 35919 return 1; 35920 return 0; 35921 } 35922 #endif /* SLAB_DEBUG_SUPPORT */ 35923 35924 /* Three slab chain funcs - all called with ints disabled 35925 * and the appropriate cache-lock held. */ 35926 static inline void 35927 kmem_slab_unlink(kmem_slab_t *slabp) 35928 { 35929 kmem_slab_t *prevp = slabp->s_prevp; 35930 kmem_slab_t *nextp = slabp->s_nextp; 35931 prevp->s_nextp = nextp; 35932 nextp->s_prevp = prevp; 35933 } 35934 35935 static inline void 35936 kmem_slab_link_end(kmem_cache_t *cachep, 35937 kmem_slab_t *slabp) 35938 { 35939 kmem_slab_t *lastp = cachep->c_lastp; 35940 slabp->s_nextp = kmem_slab_end(cachep); 35941 slabp->s_prevp = lastp; 35942 cachep->c_lastp = slabp; 35943 lastp->s_nextp = slabp; 35944 } 35945 35946 static inline void 35947 kmem_slab_link_free(kmem_cache_t *cachep, 35948 kmem_slab_t *slabp) 35949 { 35950 kmem_slab_t *nextp = cachep->c_freep; 35951 kmem_slab_t *prevp = nextp->s_prevp; 35952 slabp->s_nextp = nextp; 35953 slabp->s_prevp = prevp; 35954 nextp->s_prevp = slabp; 35955 slabp->s_prevp->s_nextp = slabp; 35956 } 35957 35958 /* Destroy all the objs in a slab, and release the mem 35959 * back to the system. Before calling the slab must have 35960 * been unlinked from the cache. The cache-lock is not 35961 * held/needed. */ 35962 static void 35963 kmem_slab_destroy(kmem_cache_t *cachep, 35964 kmem_slab_t *slabp) 35965 { 35966 if (cachep->c_dtor 35967 #if SLAB_DEBUG_SUPPORT 35968 cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE) 35969 #endif /*SLAB_DEBUG_SUPPORT*/ 35970 ) { 35971 /* Doesn't use the bufctl ptrs to find objs. */ 35972 unsigned long num = cachep->c_num; 35973 void *objp = slabp->s_mem; 35974 do { 35975 #if SLAB_DEBUG_SUPPORT 35976 if (cachep->c_flags & SLAB_RED_ZONE) { 35977 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) 35978 printk(KERN_ERR "kmem_slab_destroy: " 35979 "Bad front redzone - %s\n", 35980 cachep->c_name); 35981 objp += BYTES_PER_WORD; 35982 if (*((unsigned long*)(objp+cachep->c_org_size)) 35983 != SLAB_RED_MAGIC1) 35984 printk(KERN_ERR "kmem_slab_destroy: " 35985 "Bad rear redzone - %s\n", 35986 cachep->c_name); 35987 } 35988 if (cachep->c_dtor) 35989 #endif /*SLAB_DEBUG_SUPPORT*/ 35990 (cachep->c_dtor)(objp, cachep, 0); 35991 #if SLAB_DEBUG_SUPPORT 35992 else if (cachep->c_flags & SLAB_POISON) { 35993 if (kmem_check_poison_obj(cachep, objp)) 35994 printk(KERN_ERR "kmem_slab_destroy: " 35995 "Bad poison - %s\n", cachep->c_name); 35996 } 35997 if (cachep->c_flags & SLAB_RED_ZONE) 35998 objp -= BYTES_PER_WORD; 35999 #endif /* SLAB_DEBUG_SUPPORT */ 36000 objp += cachep->c_offset; 36001 if (!slabp->s_index) 36002 objp += sizeof(kmem_bufctl_t); 36003 } while (--num); 36004 } 36005 36006 slabp->s_magic = SLAB_MAGIC_DESTROYED; 36007 if (slabp->s_index) 36008 kmem_cache_free(cachep->c_index_cachep, 36009 slabp->s_index); 36010 kmem_freepages(cachep, slabp->s_mem-slabp->s_offset); 36011 if (SLAB_OFF_SLAB(cachep->c_flags)) 36012 kmem_cache_free(cache_slabp, slabp); 36013 } 36014 36015 /* Call the num objs, wastage, and bytes left over for a 36016 * given slab size. */ 36017 static inline size_t 36018 kmem_cache_cal_waste(unsigned long gfporder, size_t size, 36019 size_t extra, unsigned long flags, size_t *left_over, 36020 unsigned long *num) 36021 { 36022 size_t wastage = PAGE_SIZE<<gfporder; 36023 36024 if (SLAB_OFF_SLAB(flags)) 36025 gfporder = 0; 36026 else 36027 gfporder = slab_align_size; 36028 wastage -= gfporder; 36029 *num = wastage / size; 36030 wastage -= (*num * size); 36031 *left_over = wastage; 36032 36033 return (wastage + gfporder + (extra * *num)); 36034 } 36035 36036 /* Create a cache: Returns a ptr to the cache on success, 36037 * NULL on failure. Cannot be called within a int, but 36038 * can be interrupted. NOTE: The 'name' is assumed to be 36039 * memory that is _not_ going to disappear. */ 36040 kmem_cache_t * 36041 kmem_cache_create(const char *name, size_t size, 36042 size_t offset, unsigned long flags, 36043 void (*ctor)(void*, kmem_cache_t *, unsigned long), 36044 void (*dtor)(void*, kmem_cache_t *, unsigned long)) 36045 { 36046 const char *func_nm= KERN_ERR "kmem_create: "; 36047 kmem_cache_t *searchp; 36048 kmem_cache_t *cachep=NULL; 36049 size_t extra; 36050 size_t left_over; 36051 size_t align; 36052 36053 /* Sanity checks... */ 36054 #if SLAB_MGMT_CHECKS 36055 if (!name) { 36056 printk("%sNULL ptr\n", func_nm); 36057 goto opps; 36058 } 36059 if (in_interrupt()) { 36060 printk("%sCalled during int - %s\n", func_nm, name); 36061 goto opps; 36062 } 36063 36064 if (size < BYTES_PER_WORD) { 36065 printk("%sSize too small %d - %s\n", 36066 func_nm, (int) size, name); 36067 size = BYTES_PER_WORD; 36068 } 36069 36070 if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) { 36071 printk("%sSize too large %d - %s\n", 36072 func_nm, (int) size, name); 36073 goto opps; 36074 } 36075 36076 if (dtor && !ctor) { 36077 /* Decon, but no con - doesn't make sense */ 36078 printk("%sDecon but no con - %s\n", func_nm, name); 36079 goto opps; 36080 } 36081 36082 if (offset < 0 offset > size) { 36083 printk("%sOffset weird %d - %s\n", 36084 func_nm, (int) offset, name); 36085 offset = 0; 36086 } 36087 36088 #if SLAB_DEBUG_SUPPORT 36089 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { 36090 /* No ctor, but inital state check requested */ 36091 printk("%sNo con, but init state check requested - " 36092 "%s\n", func_nm, name); 36093 flags &= ~SLAB_DEBUG_INITIAL; 36094 } 36095 36096 if ((flags & SLAB_POISON) && ctor) { 36097 /* request for poisoning, but we can't do that with a 36098 * constructor */ 36099 printk("%sPoisoning requested, but con given - %s\n", 36100 func_nm, name); 36101 flags &= ~SLAB_POISON; 36102 } 36103 #if 0 36104 if ((flags & SLAB_HIGH_PACK) && ctor) { 36105 printk("%sHigh pack requested, but con given - %s\n", 36106 func_nm, name); 36107 flags &= ~SLAB_HIGH_PACK; 36108 } 36109 if ((flags & SLAB_HIGH_PACK) && 36110 (flags & (SLAB_POISON|SLAB_RED_ZONE))) { 36111 printk("%sHigh pack requested, but with " 36112 "poisoning/red-zoning - %s\n", 36113 func_nm, name); 36114 flags &= ~SLAB_HIGH_PACK; 36115 } 36116 #endif 36117 #endif /* SLAB_DEBUG_SUPPORT */ 36118 #endif /* SLAB_MGMT_CHECKS */ 36119 36120 /* Always checks flags, a caller might be expecting 36121 * debug support which isn't available. */ 36122 if (flags & ~SLAB_C_MASK) { 36123 printk("%sIllgl flg %lX - %s\n", 36124 func_nm, flags, name); 36125 flags &= SLAB_C_MASK; 36126 } 36127 36128 /* Get cache's description obj. */ 36129 cachep = 36130 (kmem_cache_t *) kmem_cache_alloc(&cache_cache, 36131 SLAB_KERNEL); 36132 if (!cachep) 36133 goto opps; 36134 memset(cachep, 0, sizeof(kmem_cache_t)); 36135 36136 /* Check that size is in terms of words. This is 36137 * needed to avoid unaligned accesses for some archs 36138 * when redzoning is used, and makes sure any on-slab 36139 * bufctl's are also correctly aligned. */ 36140 if (size & (BYTES_PER_WORD-1)) { 36141 size += (BYTES_PER_WORD-1); 36142 size &= ~(BYTES_PER_WORD-1); 36143 printk("%sForcing size word alignment - %s\n", 36144 func_nm, name); 36145 } 36146 36147 cachep->c_org_size = size; 36148 #if SLAB_DEBUG_SUPPORT 36149 if (flags & SLAB_RED_ZONE) { 36150 /* There is no point trying to honour cache alignment 36151 * when redzoning. */ 36152 flags &= ~SLAB_HWCACHE_ALIGN; 36153 size += 2*BYTES_PER_WORD; /* words for redzone */ 36154 } 36155 #endif /* SLAB_DEBUG_SUPPORT */ 36156 36157 align = BYTES_PER_WORD; 36158 if (flags & SLAB_HWCACHE_ALIGN) 36159 align = L1_CACHE_BYTES; 36160 36161 /* Determine if the slab management and/or bufclts are 36162 * 'on' or 'off' slab. */ 36163 extra = sizeof(kmem_bufctl_t); 36164 if (size < (PAGE_SIZE>>3)) { 36165 /* Size is small(ish). Use packing where bufctl size 36166 * per obj is low, and slab management is on-slab. */ 36167 #if 0 36168 if ((flags & SLAB_HIGH_PACK)) { 36169 /* Special high packing for small objects (mainly 36170 * for vm_mapping structs, but others can use it). 36171 */ 36172 if (size == (L1_CACHE_BYTES/4) 36173 size == (L1_CACHE_BYTES/2) 36174 size == L1_CACHE_BYTES) { 36175 /* The bufctl is stored with the object. */ 36176 extra = 0; 36177 } else 36178 flags &= ~SLAB_HIGH_PACK; 36179 } 36180 #endif 36181 } else { 36182 /* Size is large, assume best to place the slab 36183 * management obj off-slab (should allow better 36184 * packing of objs). */ 36185 flags |= SLAB_CFLGS_OFF_SLAB; 36186 if (!(size & ~PAGE_MASK) size == (PAGE_SIZE/2) 36187 size == (PAGE_SIZE/4) size == (PAGE_SIZE/8)) { 36188 /* To avoid waste the bufctls are off-slab... */ 36189 flags |= SLAB_CFLGS_BUFCTL; 36190 extra = 0; 36191 } /* else slab management is off-slab, but freelist 36192 * pointers are on. */ 36193 } 36194 size += extra; 36195 36196 if (flags & SLAB_HWCACHE_ALIGN) { 36197 /* Need to adjust size so that objs are cache 36198 * aligned. */ 36199 if (size > (L1_CACHE_BYTES/2)) { 36200 size_t words = size % L1_CACHE_BYTES; 36201 if (words) 36202 size += (L1_CACHE_BYTES-words); 36203 } else { 36204 /* Small obj size, can get at least two per cache 36205 * line. */ 36206 int num_per_line = L1_CACHE_BYTES/size; 36207 left_over = L1_CACHE_BYTES - (num_per_line*size); 36208 if (left_over) { 36209 /* Need to adjust size so objs cache align. */ 36210 if (left_over%num_per_line) { 36211 /* Odd num of objs per line - fixup. */ 36212 num_per_line--; 36213 left_over += size; 36214 } 36215 size += (left_over/num_per_line); 36216 } 36217 } 36218 } else if (!(size%L1_CACHE_BYTES)) { 36219 /* Size happens to cache align... */ 36220 flags |= SLAB_HWCACHE_ALIGN; 36221 align = L1_CACHE_BYTES; 36222 } 36223 36224 /* Cal size (in pages) of slabs, and the num of objs 36225 * per slab. This could be made much more intelligent. 36226 * For now, try to avoid using high page-orders for 36227 * slabs. When the gfp() funcs are more friendly 36228 * towards high-order requests, this should be changed. 36229 */ 36230 do { 36231 size_t wastage; 36232 unsigned int break_flag = 0; 36233 cal_wastage: 36234 wastage = kmem_cache_cal_waste(cachep->c_gfporder, 36235 size, extra, flags, &left_over, &cachep->c_num); 36236 if (!cachep->c_num) 36237 goto next; 36238 if (break_flag) 36239 break; 36240 if (SLAB_BUFCTL(flags) && 36241 cachep->c_num > bufctl_limit) { 36242 /* Oops, this num of objs will cause problems. */ 36243 cachep->c_gfporder--; 36244 break_flag++; 36245 goto cal_wastage; 36246 } 36247 if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER) 36248 break; 36249 36250 /* Large num of objs is good, but v. large slabs are 36251 * currently bad for the gfp()s. */ 36252 if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) { 36253 if (cachep->c_gfporder < slab_break_gfp_order) 36254 goto next; 36255 } 36256 36257 /* Stop caches with small objs having a large num of 36258 * pages. */ 36259 if (left_over <= slab_align_size) 36260 break; 36261 if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder)) 36262 break; /* Acceptable internal fragmentation. */ 36263 next: 36264 cachep->c_gfporder++; 36265 } while (1); 36266 36267 /* If the slab has been placed off-slab, and we have 36268 * enough space then move it on-slab. This is at the 36269 * expense of any extra colouring. */ 36270 if ((flags & SLAB_CFLGS_OFF_SLAB) && 36271 !SLAB_BUFCTL(flags) && 36272 left_over >= slab_align_size) { 36273 flags &= ~SLAB_CFLGS_OFF_SLAB; 36274 left_over -= slab_align_size; 36275 } 36276 36277 /* Offset must be a factor of the alignment. */ 36278 offset += (align-1); 36279 offset &= ~(align-1); 36280 36281 /* Mess around with the offset alignment. */ 36282 if (!left_over) { 36283 offset = 0; 36284 } else if (left_over < offset) { 36285 offset = align; 36286 if (flags & SLAB_HWCACHE_ALIGN) { 36287 if (left_over < offset) 36288 offset = 0; 36289 } else { 36290 /* Offset is BYTES_PER_WORD, and left_over is at 36291 * least BYTES_PER_WORD. 36292 */ 36293 if (left_over >= (BYTES_PER_WORD*2)) { 36294 offset >>= 1; 36295 if (left_over >= (BYTES_PER_WORD*4)) 36296 offset >>= 1; 36297 } 36298 } 36299 } else if (!offset) { 36300 /* No offset requested, but space enough - give 36301 * one. */ 36302 offset = left_over/align; 36303 if (flags & SLAB_HWCACHE_ALIGN) { 36304 if (offset >= 8) { 36305 /* A large number of colours - use a larger 36306 * alignment. */ 36307 align <<= 1; 36308 } 36309 } else { 36310 if (offset >= 10) { 36311 align <<= 1; 36312 if (offset >= 16) 36313 align <<= 1; 36314 } 36315 } 36316 offset = align; 36317 } 36318 36319 #if 0 36320 printk("%s: Left_over:%d Align:%d Size:%d\n", 36321 name, left_over, offset, size); 36322 #endif 36323 36324 if ((cachep->c_align = (unsigned long) offset)) 36325 cachep->c_colour = (left_over/offset); 36326 cachep->c_colour_next = cachep->c_colour; 36327 36328 /* If the bufctl's are on-slab, c_offset does not 36329 * include the size of bufctl. */ 36330 if (!SLAB_BUFCTL(flags)) 36331 size -= sizeof(kmem_bufctl_t); 36332 else 36333 cachep->c_index_cachep = 36334 kmem_find_general_cachep(cachep->c_num * 36335 sizeof(kmem_bufctl_t)); 36336 cachep->c_offset = (unsigned long) size; 36337 cachep->c_freep = kmem_slab_end(cachep); 36338 cachep->c_firstp = kmem_slab_end(cachep); 36339 cachep->c_lastp = kmem_slab_end(cachep); 36340 cachep->c_flags = flags; 36341 cachep->c_ctor = ctor; 36342 cachep->c_dtor = dtor; 36343 cachep->c_magic = SLAB_C_MAGIC; 36344 cachep->c_name = name; /* Simply point to the name. */ 36345 spin_lock_init(&cachep->c_spinlock); 36346 36347 /* Need the semaphore to access the chain. */ 36348 down(&cache_chain_sem); 36349 searchp = &cache_cache; 36350 do { 36351 /* The name field is constant - no lock needed. */ 36352 if (!strcmp(searchp->c_name, name)) { 36353 printk("%sDup name - %s\n", func_nm, name); 36354 break; 36355 } 36356 searchp = searchp->c_nextp; 36357 } while (searchp != &cache_cache); 36358 36359 /* There is no reason to lock our new cache before we 36360 * link it in - no one knows about it yet... 36361 */ 36362 cachep->c_nextp = cache_cache.c_nextp; 36363 cache_cache.c_nextp = cachep; 36364 up(&cache_chain_sem); 36365 opps: 36366 return cachep; 36367 } 36368 36369 /* Shrink a cache. Releases as many slabs as possible 36370 * for a cache. It is expected this function will be 36371 * called by a module when it is unloaded. The cache is 36372 * _not_ removed, this creates too many problems and the 36373 * cache-structure does not take up much room. A module 36374 * should keep its cache pointer(s) in unloaded memory, 36375 * so when reloaded it knows the cache is available. To 36376 * help debugging, a zero exit status indicates all slabs 36377 * were released. */ 36378 int 36379 kmem_cache_shrink(kmem_cache_t *cachep) 36380 { 36381 kmem_cache_t *searchp; 36382 kmem_slab_t *slabp; 36383 int ret; 36384 36385 if (!cachep) { 36386 printk(KERN_ERR "kmem_shrink: NULL ptr\n"); 36387 return 2; 36388 } 36389 if (in_interrupt()) { 36390 printk(KERN_ERR "kmem_shrink: Called during int - " 36391 "%s\n", cachep->c_name); 36392 return 2; 36393 } 36394 36395 /* Find the cache in the chain of caches. */ 36396 down(&cache_chain_sem); /* Semaphore is needed. */ 36397 searchp = &cache_cache; 36398 for (;searchp->c_nextp != &cache_cache; 36399 searchp = searchp->c_nextp) { 36400 if (searchp->c_nextp != cachep) 36401 continue; 36402 36403 /* Accessing clock_searchp is safe - we hold the 36404 * mutex. */ 36405 if (cachep == clock_searchp) 36406 clock_searchp = cachep->c_nextp; 36407 goto found; 36408 } 36409 up(&cache_chain_sem); 36410 printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", 36411 cachep); 36412 return 2; 36413 found: 36414 /* Release the semaphore before getting the cache-lock. 36415 * This could mean multiple engines are shrinking the 36416 * cache, but so what. */ 36417 up(&cache_chain_sem); 36418 spin_lock_irq(&cachep->c_spinlock); 36419 36420 /* If the cache is growing, stop shrinking. */ 36421 while (!cachep->c_growing) { 36422 slabp = cachep->c_lastp; 36423 if (slabp->s_inuse slabp == kmem_slab_end(cachep)) 36424 break; 36425 kmem_slab_unlink(slabp); 36426 spin_unlock_irq(&cachep->c_spinlock); 36427 kmem_slab_destroy(cachep, slabp); 36428 spin_lock_irq(&cachep->c_spinlock); 36429 } 36430 ret = 1; 36431 if (cachep->c_lastp == kmem_slab_end(cachep)) 36432 ret--; /* Cache is empty. */ 36433 spin_unlock_irq(&cachep->c_spinlock); 36434 return ret; 36435 } 36436 36437 /* Get the memory for a slab management obj. */ 36438 static inline kmem_slab_t * 36439 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, 36440 int local_flags) 36441 { 36442 kmem_slab_t *slabp; 36443 36444 if (SLAB_OFF_SLAB(cachep->c_flags)) { 36445 /* Slab management obj is off-slab. */ 36446 slabp = kmem_cache_alloc(cache_slabp, local_flags); 36447 } else { 36448 /* Slab management at end of slab memory, placed so 36449 * that the position is 'coloured'. */ 36450 void *end; 36451 end = objp + (cachep->c_num * cachep->c_offset); 36452 if (!SLAB_BUFCTL(cachep->c_flags)) 36453 end += (cachep->c_num * sizeof(kmem_bufctl_t)); 36454 slabp = 36455 (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end); 36456 } 36457 36458 if (slabp) { 36459 slabp->s_inuse = 0; 36460 slabp->s_dma = 0; 36461 slabp->s_index = NULL; 36462 } 36463 36464 return slabp; 36465 } 36466 36467 static inline void 36468 kmem_cache_init_objs(kmem_cache_t * cachep, 36469 kmem_slab_t * slabp, void *objp, 36470 unsigned long ctor_flags) 36471 { 36472 kmem_bufctl_t **bufpp = &slabp->s_freep; 36473 unsigned long num = cachep->c_num-1; 36474 36475 do { 36476 #if SLAB_DEBUG_SUPPORT 36477 if (cachep->c_flags & SLAB_RED_ZONE) { 36478 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1; 36479 objp += BYTES_PER_WORD; 36480 *((unsigned long*)(objp+cachep->c_org_size)) = 36481 SLAB_RED_MAGIC1; 36482 } 36483 #endif /* SLAB_DEBUG_SUPPORT */ 36484 36485 /* Constructors are not allowed to allocate memory 36486 * from the same cache which they are a constructor 36487 * for. Otherwise, deadlock. They must also be 36488 * threaded. */ 36489 if (cachep->c_ctor) 36490 cachep->c_ctor(objp, cachep, ctor_flags); 36491 #if SLAB_DEBUG_SUPPORT 36492 else if (cachep->c_flags & SLAB_POISON) { 36493 /* need to poison the objs */ 36494 kmem_poison_obj(cachep, objp); 36495 } 36496 36497 if (cachep->c_flags & SLAB_RED_ZONE) { 36498 if (*((unsigned long*)(objp+cachep->c_org_size)) != 36499 SLAB_RED_MAGIC1) { 36500 *((unsigned long*)(objp+cachep->c_org_size)) = 36501 SLAB_RED_MAGIC1; 36502 printk(KERN_ERR 36503 "kmem_init_obj: Bad rear redzone " 36504 "after constructor - %s\n", 36505 cachep->c_name); 36506 } 36507 objp -= BYTES_PER_WORD; 36508 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) { 36509 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1; 36510 printk(KERN_ERR 36511 "kmem_init_obj: Bad front redzone " 36512 "after constructor - %s\n", 36513 cachep->c_name); 36514 } 36515 } 36516 #endif /* SLAB_DEBUG_SUPPORT */ 36517 36518 objp += cachep->c_offset; 36519 if (!slabp->s_index) { 36520 *bufpp = objp; 36521 objp += sizeof(kmem_bufctl_t); 36522 } else 36523 *bufpp = &slabp->s_index[num]; 36524 bufpp = &(*bufpp)->buf_nextp; 36525 } while (num--); 36526 36527 *bufpp = NULL; 36528 } 36529 36530 /* Grow (by 1) the number of slabs within a cache. This 36531 * is called by kmem_cache_alloc() when there are no 36532 * active objs left in a cache. */ 36533 static int 36534 kmem_cache_grow(kmem_cache_t * cachep, int flags) 36535 { 36536 kmem_slab_t *slabp; 36537 struct page *page; 36538 void *objp; 36539 size_t offset; 36540 unsigned int dma, local_flags; 36541 unsigned long ctor_flags; 36542 unsigned long save_flags; 36543 36544 /* Be lazy and only check for valid flags here, keeping 36545 * it out of the critical path in kmem_cache_alloc(). 36546 */ 36547 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) { 36548 printk(KERN_WARNING "kmem_grow: Illegal flgs %X " 36549 "(correcting) - %s\n", flags, cachep->c_name); 36550 flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW); 36551 } 36552 36553 if (flags & SLAB_NO_GROW) 36554 return 0; 36555 36556 /* The test for missing atomic flag is performed here, 36557 * rather than the more obvious place, simply to reduce 36558 * the critical path length in kmem_cache_alloc(). If 36559 * a caller is slightly mis-behaving they will 36560 * eventually be caught here (where it matters). */ 36561 if (in_interrupt() && 36562 (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) { 36563 printk(KERN_ERR "kmem_grow: Called nonatomically " 36564 "from int - %s\n", cachep->c_name); 36565 flags &= ~SLAB_LEVEL_MASK; 36566 flags |= SLAB_ATOMIC; 36567 } 36568 ctor_flags = SLAB_CTOR_CONSTRUCTOR; 36569 local_flags = (flags & SLAB_LEVEL_MASK); 36570 if (local_flags == SLAB_ATOMIC) { 36571 /* Not allowed to sleep. Need to tell a constructor 36572 * about this - it might need to know... */ 36573 ctor_flags |= SLAB_CTOR_ATOMIC; 36574 } 36575 36576 /* About to mess with non-constant members - lock. */ 36577 spin_lock_irqsave(&cachep->c_spinlock, save_flags); 36578 36579 /* Get colour for the slab, and cal the next value. */ 36580 if (!(offset = cachep->c_colour_next--)) 36581 cachep->c_colour_next = cachep->c_colour; 36582 offset *= cachep->c_align; 36583 cachep->c_dflags = SLAB_CFLGS_GROWN; 36584 36585 cachep->c_growing++; 36586 spin_unlock_irqrestore(&cachep->c_spinlock, 36587 save_flags); 36588 36589 /* A series of memory allocations for a new slab. 36590 * Neither the cache-chain semaphore, or cache-lock, 36591 * are held, but the incrementing c_growing prevents 36592 * this this cache from being reaped or shrunk. Note: 36593 * The cache could be selected in for reaping in 36594 * kmem_cache_reap(), but when the final test is made 36595 * the growing value will be seen. */ 36596 36597 /* Get mem for the objs. */ 36598 if (!(objp = kmem_getpages(cachep, flags, &dma))) 36599 goto failed; 36600 36601 /* Get slab management. */ 36602 if (!(slabp = kmem_cache_slabmgmt(cachep, 36603 objp+offset, 36604 local_flags))) 36605 goto opps1; 36606 if (dma) 36607 slabp->s_dma = 1; 36608 if (SLAB_BUFCTL(cachep->c_flags)) { 36609 slabp->s_index = 36610 kmem_cache_alloc(cachep->c_index_cachep, 36611 local_flags); 36612 if (!slabp->s_index) 36613 goto opps2; 36614 } 36615 36616 /* Nasty!!!!!! I hope this is OK. */ 36617 dma = 1 << cachep->c_gfporder; 36618 page = &mem_map[MAP_NR(objp)]; 36619 do { 36620 SLAB_SET_PAGE_CACHE(page, cachep); 36621 SLAB_SET_PAGE_SLAB(page, slabp); 36622 PageSetSlab(page); 36623 page++; 36624 } while (--dma); 36625 36626 slabp->s_offset = offset; /* It will fit... */ 36627 objp += offset; /* Address of first object. */ 36628 slabp->s_mem = objp; 36629 36630 /* For on-slab bufctls, c_offset is the distance 36631 * between the start of an obj and its related bufctl. 36632 * For off-slab bufctls, c_offset is the distance 36633 * between objs in the slab. */ 36634 kmem_cache_init_objs(cachep, slabp, objp, ctor_flags); 36635 36636 spin_lock_irq(&cachep->c_spinlock); 36637 36638 /* Make slab active. */ 36639 slabp->s_magic = SLAB_MAGIC_ALLOC; 36640 kmem_slab_link_end(cachep, slabp); 36641 if (cachep->c_freep == kmem_slab_end(cachep)) 36642 cachep->c_freep = slabp; 36643 SLAB_STATS_INC_GROWN(cachep); 36644 cachep->c_failures = 0; 36645 cachep->c_growing--; 36646 36647 spin_unlock_irqrestore(&cachep->c_spinlock, 36648 save_flags); 36649 return 1; 36650 opps2: 36651 if (SLAB_OFF_SLAB(cachep->c_flags)) 36652 kmem_cache_free(cache_slabp, slabp); 36653 opps1: 36654 kmem_freepages(cachep, objp); 36655 failed: 36656 spin_lock_irq(&cachep->c_spinlock); 36657 cachep->c_growing--; 36658 spin_unlock_irqrestore(&cachep->c_spinlock, 36659 save_flags); 36660 return 0; 36661 } 36662 36663 static void 36664 kmem_report_alloc_err(const char *str, 36665 kmem_cache_t * cachep) 36666 { 36667 if (cachep) 36668 SLAB_STATS_INC_ERR(cachep); /* this is atomic */ 36669 printk(KERN_ERR "kmem_alloc: %s (name=%s)\n", 36670 str, cachep ? cachep->c_name : "unknown"); 36671 } 36672 36673 static void 36674 kmem_report_free_err(const char *str, const void *objp, 36675 kmem_cache_t * cachep) 36676 { 36677 if (cachep) 36678 SLAB_STATS_INC_ERR(cachep); 36679 printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n", 36680 str, objp, cachep ? cachep->c_name : "unknown"); 36681 } 36682 36683 /* Search for a slab whose objs are suitable for DMA. 36684 * Note: since testing the first free slab (in 36685 * __kmem_cache_alloc()), ints must not have been 36686 * enabled, or the cache-lock released! */ 36687 static inline kmem_slab_t * 36688 kmem_cache_search_dma(kmem_cache_t * cachep) 36689 { 36690 kmem_slab_t *slabp = cachep->c_freep->s_nextp; 36691 36692 for (; slabp != kmem_slab_end(cachep); 36693 slabp = slabp->s_nextp) { 36694 if (!(slabp->s_dma)) 36695 continue; 36696 kmem_slab_unlink(slabp); 36697 kmem_slab_link_free(cachep, slabp); 36698 cachep->c_freep = slabp; 36699 break; 36700 } 36701 return slabp; 36702 } 36703 36704 #if SLAB_DEBUG_SUPPORT 36705 /* Perform extra freeing checks. Currently, this check 36706 * is only for caches that use bufctl structures within 36707 * the slab. Those which use bufctl's from the internal 36708 * cache have a reasonable check when the address is 36709 * searched for. Called with the cache-lock held. */ 36710 static void * 36711 kmem_extra_free_checks(kmem_cache_t * cachep, 36712 kmem_bufctl_t *search_bufp, 36713 kmem_bufctl_t *bufp, void * objp) 36714 { 36715 if (SLAB_BUFCTL(cachep->c_flags)) 36716 return objp; 36717 36718 /* Check slab's freelist to see if this obj is 36719 * there. */ 36720 for (; search_bufp; 36721 search_bufp = search_bufp->buf_nextp) { 36722 if (search_bufp != bufp) 36723 continue; 36724 return NULL; 36725 } 36726 return objp; 36727 } 36728 #endif /* SLAB_DEBUG_SUPPORT */ 36729 36730 /* Called with cache lock held. */ 36731 static inline void 36732 kmem_cache_full_free(kmem_cache_t *cachep, 36733 kmem_slab_t *slabp) 36734 { 36735 if (slabp->s_nextp->s_inuse) { 36736 /* Not at correct position. */ 36737 if (cachep->c_freep == slabp) 36738 cachep->c_freep = slabp->s_nextp; 36739 kmem_slab_unlink(slabp); 36740 kmem_slab_link_end(cachep, slabp); 36741 } 36742 } 36743 36744 /* Called with cache lock held. */ 36745 static inline void 36746 kmem_cache_one_free(kmem_cache_t *cachep, 36747 kmem_slab_t *slabp) 36748 { 36749 if (slabp->s_nextp->s_inuse == cachep->c_num) { 36750 kmem_slab_unlink(slabp); 36751 kmem_slab_link_free(cachep, slabp); 36752 } 36753 cachep->c_freep = slabp; 36754 } 36755 36756 /* Returns a ptr to an obj in the given cache. */ 36757 static inline void * 36758 __kmem_cache_alloc(kmem_cache_t *cachep, int flags) 36759 { 36760 kmem_slab_t *slabp; 36761 kmem_bufctl_t *bufp; 36762 void *objp; 36763 unsigned long save_flags; 36764 36765 /* Sanity check. */ 36766 if (!cachep) 36767 goto nul_ptr; 36768 spin_lock_irqsave(&cachep->c_spinlock, save_flags); 36769 try_again: 36770 /* Get slab alloc is to come from. */ 36771 slabp = cachep->c_freep; 36772 36773 /* Magic is a sanity check _and_ says if we need a new 36774 * slab. */ 36775 if (slabp->s_magic != SLAB_MAGIC_ALLOC) 36776 goto alloc_new_slab; 36777 /* DMA requests are 'rare' - keep out of the critical 36778 * path. */ 36779 if (flags & SLAB_DMA) 36780 goto search_dma; 36781 try_again_dma: 36782 SLAB_STATS_INC_ALLOCED(cachep); 36783 SLAB_STATS_INC_ACTIVE(cachep); 36784 SLAB_STATS_SET_HIGH(cachep); 36785 slabp->s_inuse++; 36786 bufp = slabp->s_freep; 36787 slabp->s_freep = bufp->buf_nextp; 36788 if (slabp->s_freep) { 36789 ret_obj: 36790 if (!slabp->s_index) { 36791 bufp->buf_slabp = slabp; 36792 objp = ((void*)bufp) - cachep->c_offset; 36793 finished: 36794 /* The lock is not needed by the red-zone or poison 36795 * ops, and the obj has been removed from the slab. 36796 * Should be safe to drop the lock here. */ 36797 spin_unlock_irqrestore(&cachep->c_spinlock, 36798 save_flags); 36799 #if SLAB_DEBUG_SUPPORT 36800 if (cachep->c_flags & SLAB_RED_ZONE) 36801 goto red_zone; 36802 ret_red: 36803 if ((cachep->c_flags & SLAB_POISON) && 36804 kmem_check_poison_obj(cachep, objp)) 36805 kmem_report_alloc_err("Bad poison", cachep); 36806 #endif /* SLAB_DEBUG_SUPPORT */ 36807 return objp; 36808 } 36809 /* Update index ptr. */ 36810 objp = ((bufp-slabp->s_index)*cachep->c_offset) + 36811 slabp->s_mem; 36812 bufp->buf_objp = objp; 36813 goto finished; 36814 } 36815 cachep->c_freep = slabp->s_nextp; 36816 goto ret_obj; 36817 36818 #if SLAB_DEBUG_SUPPORT 36819 red_zone: 36820 /* Set alloc red-zone, and check old one. */ 36821 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) 36822 != SLAB_RED_MAGIC1) 36823 kmem_report_alloc_err("Bad front redzone", cachep); 36824 objp += BYTES_PER_WORD; 36825 if (xchg((unsigned long *)(objp+cachep->c_org_size), 36826 SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1) 36827 kmem_report_alloc_err("Bad rear redzone", cachep); 36828 goto ret_red; 36829 #endif /* SLAB_DEBUG_SUPPORT */ 36830 36831 search_dma: 36832 if (slabp->s_dma 36833 (slabp = kmem_cache_search_dma(cachep)) != 36834 kmem_slab_end(cachep)) 36835 goto try_again_dma; 36836 alloc_new_slab: 36837 /* Either out of slabs, or magic number corruption. */ 36838 if (slabp == kmem_slab_end(cachep)) { 36839 /* Need a new slab. Release the lock before calling 36840 * kmem_cache_grow(). This allows objs to be 36841 * released back into the cache while growing. */ 36842 spin_unlock_irqrestore(&cachep->c_spinlock, 36843 save_flags); 36844 if (kmem_cache_grow(cachep, flags)) { 36845 /* Someone may have stolen our objs. Doesn't 36846 * matter, we'll just come back here again. */ 36847 spin_lock_irq(&cachep->c_spinlock); 36848 goto try_again; 36849 } 36850 /* Couldn't grow, but some objs may have been 36851 * freed. */ 36852 spin_lock_irq(&cachep->c_spinlock); 36853 if (cachep->c_freep != kmem_slab_end(cachep)) { 36854 if ((flags & SLAB_ATOMIC) == 0) 36855 goto try_again; 36856 } 36857 } else { 36858 /* Very serious error - maybe panic() here? */ 36859 kmem_report_alloc_err("Bad slab magic (corrupt)", 36860 cachep); 36861 } 36862 spin_unlock_irqrestore(&cachep->c_spinlock, 36863 save_flags); 36864 err_exit: 36865 return NULL; 36866 nul_ptr: 36867 kmem_report_alloc_err("NULL ptr", NULL); 36868 goto err_exit; 36869 } 36870 36871 /* Release an obj back to its cache. If the obj has a 36872 * constructed state, it should be in this state _before_ 36873 * it is released. */ 36874 static inline void 36875 __kmem_cache_free(kmem_cache_t *cachep, const void *objp) 36876 { 36877 kmem_slab_t *slabp; 36878 kmem_bufctl_t *bufp; 36879 unsigned long save_flags; 36880 36881 /* Basic sanity checks. */ 36882 if (!cachep !objp) 36883 goto null_addr; 36884 36885 #if SLAB_DEBUG_SUPPORT 36886 /* A verify func is called without the cache-lock 36887 * held. */ 36888 if (cachep->c_flags & SLAB_DEBUG_INITIAL) 36889 goto init_state_check; 36890 finished_initial: 36891 36892 if (cachep->c_flags & SLAB_RED_ZONE) 36893 goto red_zone; 36894 return_red: 36895 #endif /* SLAB_DEBUG_SUPPORT */ 36896 36897 spin_lock_irqsave(&cachep->c_spinlock, save_flags); 36898 36899 if (SLAB_BUFCTL(cachep->c_flags)) 36900 goto bufctl; 36901 bufp = (kmem_bufctl_t *)(objp+cachep->c_offset); 36902 36903 /* Get slab for the object. */ 36904 #if 0 36905 /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref 36906 * for some objects. Is this worth while? XXX */ 36907 if (cachep->c_flags & SLAB_HIGH_PACK) 36908 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]); 36909 else 36910 #endif 36911 slabp = bufp->buf_slabp; 36912 36913 check_magic: 36914 /* Sanity check. */ 36915 if (slabp->s_magic != SLAB_MAGIC_ALLOC) 36916 goto bad_slab; 36917 36918 #if SLAB_DEBUG_SUPPORT 36919 if (cachep->c_flags & SLAB_DEBUG_FREE) 36920 goto extra_checks; 36921 passed_extra: 36922 #endif /* SLAB_DEBUG_SUPPORT */ 36923 36924 if (slabp->s_inuse) { /* Sanity check. */ 36925 SLAB_STATS_DEC_ACTIVE(cachep); 36926 slabp->s_inuse--; 36927 bufp->buf_nextp = slabp->s_freep; 36928 slabp->s_freep = bufp; 36929 if (bufp->buf_nextp) { 36930 if (slabp->s_inuse) { 36931 /* (hopefully) The most common case. */ 36932 finished: 36933 #if SLAB_DEBUG_SUPPORT 36934 if (cachep->c_flags & SLAB_POISON) { 36935 if (cachep->c_flags & SLAB_RED_ZONE) 36936 objp += BYTES_PER_WORD; 36937 kmem_poison_obj(cachep, objp); 36938 } 36939 #endif /* SLAB_DEBUG_SUPPORT */ 36940 spin_unlock_irqrestore(&cachep->c_spinlock, 36941 save_flags); 36942 return; 36943 } 36944 kmem_cache_full_free(cachep, slabp); 36945 goto finished; 36946 } 36947 kmem_cache_one_free(cachep, slabp); 36948 goto finished; 36949 } 36950 36951 /* Don't add to freelist. */ 36952 spin_unlock_irqrestore(&cachep->c_spinlock, 36953 save_flags); 36954 kmem_report_free_err("free with no active objs", 36955 objp, cachep); 36956 return; 36957 bufctl: 36958 /* No 'extra' checks are performed for objs stored this 36959 * way, finding the obj is check enough. */ 36960 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]); 36961 bufp = &slabp->s_index[(objp - slabp->s_mem) / 36962 cachep->c_offset]; 36963 if (bufp->buf_objp == objp) 36964 goto check_magic; 36965 spin_unlock_irqrestore(&cachep->c_spinlock, 36966 save_flags); 36967 kmem_report_free_err("Either bad obj addr or double " 36968 "free", objp, cachep); 36969 return; 36970 #if SLAB_DEBUG_SUPPORT 36971 init_state_check: 36972 /* Need to call the slab's constructor so the caller 36973 * can perform a verify of its state (debugging). */ 36974 cachep->c_ctor(objp, cachep, 36975 SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); 36976 goto finished_initial; 36977 extra_checks: 36978 if (!kmem_extra_free_checks(cachep, slabp->s_freep, 36979 bufp, objp)) { 36980 spin_unlock_irqrestore(&cachep->c_spinlock, 36981 save_flags); 36982 kmem_report_free_err("Double free detected during " 36983 "checks", objp, cachep); 36984 return; 36985 } 36986 goto passed_extra; 36987 red_zone: 36988 /* We do not hold the cache-lock while checking the 36989 * red-zone. */ 36990 objp -= BYTES_PER_WORD; 36991 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != 36992 SLAB_RED_MAGIC2) { 36993 /* Either write before start of obj, or a double 36994 * free. */ 36995 kmem_report_free_err("Bad front redzone", objp, 36996 cachep); 36997 } 36998 if (xchg((unsigned long *) 36999 (objp+cachep->c_org_size+BYTES_PER_WORD), 37000 SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) { 37001 /* Either write past end of obj, or a double free. */ 37002 kmem_report_free_err("Bad rear redzone", 37003 objp, cachep); 37004 } 37005 goto return_red; 37006 #endif /* SLAB_DEBUG_SUPPORT */ 37007 37008 bad_slab: 37009 /* Slab doesn't contain the correct magic num. */ 37010 if (slabp->s_magic == SLAB_MAGIC_DESTROYED) { 37011 /* Magic num says this is a destroyed slab. */ 37012 kmem_report_free_err("free from inactive slab", 37013 objp, cachep); 37014 } else 37015 kmem_report_free_err("Bad obj addr", objp, cachep); 37016 spin_unlock_irqrestore(&cachep->c_spinlock, 37017 save_flags); 37018 37019 #if 1 37020 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL 37021 * CAPS. GET THE CALL CHAIN. */ 37022 *(int *) 0 = 0; 37023 #endif 37024 37025 return; 37026 null_addr: 37027 kmem_report_free_err("NULL ptr", objp, cachep); 37028 return; 37029 } 37030 37031 void * 37032 kmem_cache_alloc(kmem_cache_t *cachep, int flags) 37033 { 37034 return __kmem_cache_alloc(cachep, flags); 37035 } 37036 37037 void 37038 kmem_cache_free(kmem_cache_t *cachep, void *objp) 37039 { 37040 __kmem_cache_free(cachep, objp); 37041 } 37042 37043 void * 37044 kmalloc(size_t size, int flags) 37045 { 37046 cache_sizes_t *csizep = cache_sizes; 37047 37048 for (; csizep->cs_size; csizep++) { 37049 if (size > csizep->cs_size) 37050 continue; 37051 return __kmem_cache_alloc(csizep->cs_cachep, flags); 37052 } 37053 printk(KERN_ERR "kmalloc: Size (%lu) too large\n", 37054 (unsigned long) size); 37055 return NULL; 37056 } 37057 37058 void 37059 kfree(const void *objp) 37060 { 37061 struct page *page; 37062 int nr; 37063 37064 if (!objp) 37065 goto null_ptr; 37066 nr = MAP_NR(objp); 37067 if (nr >= max_mapnr) 37068 goto bad_ptr; 37069 37070 /* Assume we own the page structure - hence no locking. 37071 * If someone is misbehaving (for example, calling us 37072 * with a bad address), then access to the page 37073 * structure can race with the kmem_slab_destroy() 37074 * code. Need to add a spin_lock to each page 37075 * structure, which would be useful in threading the 37076 * gfp() functions.... */ 37077 page = &mem_map[nr]; 37078 if (PageSlab(page)) { 37079 kmem_cache_t *cachep; 37080 37081 /* Here, we again assume the obj address is good. If 37082 * it isn't, and happens to map onto another general 37083 * cache page which has no active objs, then we race. 37084 */ 37085 cachep = SLAB_GET_PAGE_CACHE(page); 37086 if (cachep && 37087 (cachep->c_flags & SLAB_CFLGS_GENERAL)) { 37088 __kmem_cache_free(cachep, objp); 37089 return; 37090 } 37091 } 37092 bad_ptr: 37093 printk(KERN_ERR "kfree: Bad obj %p\n", objp); 37094 37095 #if 1 37096 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL 37097 * CAPS. GET THE CALL CHAIN. */ 37098 *(int *) 0 = 0; 37099 #endif 37100 37101 null_ptr: 37102 return; 37103 } 37104 37105 void 37106 kfree_s(const void *objp, size_t size) 37107 { 37108 struct page *page; 37109 int nr; 37110 37111 if (!objp) 37112 goto null_ptr; 37113 nr = MAP_NR(objp); 37114 if (nr >= max_mapnr) 37115 goto null_ptr; 37116 /* See comment in kfree() */ 37117 page = &mem_map[nr]; 37118 if (PageSlab(page)) { 37119 kmem_cache_t *cachep; 37120 /* See comment in kfree() */ 37121 cachep = SLAB_GET_PAGE_CACHE(page); 37122 if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) { 37123 if (size <= cachep->c_org_size) { 37124 /* XXX better check */ 37125 __kmem_cache_free(cachep, objp); 37126 return; 37127 } 37128 } 37129 } 37130 null_ptr: 37131 printk(KERN_ERR "kfree_s: Bad obj %p\n", objp); 37132 return; 37133 } 37134 37135 kmem_cache_t * 37136 kmem_find_general_cachep(size_t size) 37137 { 37138 cache_sizes_t *csizep = cache_sizes; 37139 37140 /* This function could be moved to the header file, and 37141 * made inline so consumers can quickly determine what 37142 * cache pointer they require. 37143 */ 37144 for (; csizep->cs_size; csizep++) { 37145 if (size > csizep->cs_size) 37146 continue; 37147 break; 37148 } 37149 return csizep->cs_cachep; 37150 } 37151 37152 37153 /* Called from try_to_free_page(). 37154 * This function _cannot_ be called within a int, but it 37155 * can be interrupted. 37156 */ 37157 void 37158 kmem_cache_reap(int gfp_mask) 37159 { 37160 kmem_slab_t *slabp; 37161 kmem_cache_t *searchp; 37162 kmem_cache_t *best_cachep; 37163 unsigned int scan; 37164 unsigned int reap_level; 37165 37166 if (in_interrupt()) { 37167 printk("kmem_cache_reap() called within int!\n"); 37168 return; 37169 } 37170 37171 /* We really need a test semaphore op so we can avoid 37172 * sleeping when !wait is true. */ 37173 down(&cache_chain_sem); 37174 37175 scan = 10; 37176 reap_level = 0; 37177 37178 best_cachep = NULL; 37179 searchp = clock_searchp; 37180 do { 37181 unsigned int full_free; 37182 unsigned int dma_flag; 37183 37184 /* It's safe to test this without holding the 37185 * cache-lock. */ 37186 if (searchp->c_flags & SLAB_NO_REAP) 37187 goto next; 37188 spin_lock_irq(&searchp->c_spinlock); 37189 if (searchp->c_growing) 37190 goto next_unlock; 37191 if (searchp->c_dflags & SLAB_CFLGS_GROWN) { 37192 searchp->c_dflags &= ~SLAB_CFLGS_GROWN; 37193 goto next_unlock; 37194 } 37195 /* Sanity check for corruption of static values. */ 37196 if (searchp->c_inuse 37197 searchp->c_magic != SLAB_C_MAGIC) { 37198 spin_unlock_irq(&searchp->c_spinlock); 37199 printk(KERN_ERR "kmem_reap: Corrupted cache struct" 37200 " for %s\n", searchp->c_name); 37201 goto next; 37202 } 37203 dma_flag = 0; 37204 full_free = 0; 37205 37206 /* Count the fully free slabs. There should not be 37207 * not many, since we are holding the cache lock. */ 37208 slabp = searchp->c_lastp; 37209 while (!slabp->s_inuse && 37210 slabp != kmem_slab_end(searchp)) { 37211 slabp = slabp->s_prevp; 37212 full_free++; 37213 if (slabp->s_dma) 37214 dma_flag++; 37215 } 37216 spin_unlock_irq(&searchp->c_spinlock); 37217 37218 if ((gfp_mask & GFP_DMA) && !dma_flag) 37219 goto next; 37220 37221 if (full_free) { 37222 if (full_free >= 10) { 37223 best_cachep = searchp; 37224 break; 37225 } 37226 37227 /* Try to avoid slabs with constructors and/or more 37228 * than one page per slab (as it can be difficult 37229 * to get high orders from gfp()). */ 37230 if (full_free >= reap_level) { 37231 reap_level = full_free; 37232 best_cachep = searchp; 37233 } 37234 } 37235 goto next; 37236 next_unlock: 37237 spin_unlock_irq(&searchp->c_spinlock); 37238 next: 37239 searchp = searchp->c_nextp; 37240 } while (--scan && searchp != clock_searchp); 37241 37242 clock_searchp = searchp; 37243 up(&cache_chain_sem); 37244 37245 if (!best_cachep) { 37246 /* couldn't find anything to reap */ 37247 return; 37248 } 37249 37250 spin_lock_irq(&best_cachep->c_spinlock); 37251 while (!best_cachep->c_growing && 37252 !(slabp = best_cachep->c_lastp)->s_inuse && 37253 slabp != kmem_slab_end(best_cachep)) { 37254 if (gfp_mask & GFP_DMA) { 37255 do { 37256 if (slabp->s_dma) 37257 goto good_dma; 37258 slabp = slabp->s_prevp; 37259 } while (!slabp->s_inuse && 37260 slabp != kmem_slab_end(best_cachep)); 37261 37262 /* Didn't found a DMA slab (there was a free one - 37263 * must have been become active). */ 37264 goto dma_fail; 37265 good_dma: 37266 } 37267 if (slabp == best_cachep->c_freep) 37268 best_cachep->c_freep = slabp->s_nextp; 37269 kmem_slab_unlink(slabp); 37270 SLAB_STATS_INC_REAPED(best_cachep); 37271 37272 /* Safe to drop the lock. The slab is no longer 37273 * linked to the cache. */ 37274 spin_unlock_irq(&best_cachep->c_spinlock); 37275 kmem_slab_destroy(best_cachep, slabp); 37276 spin_lock_irq(&best_cachep->c_spinlock); 37277 } 37278 dma_fail: 37279 spin_unlock_irq(&best_cachep->c_spinlock); 37280 return; 37281 } 37282 37283 #if SLAB_SELFTEST 37284 /* A few v. simple tests */ 37285 static void 37286 kmem_self_test(void) 37287 { 37288 kmem_cache_t *test_cachep; 37289 37290 printk(KERN_INFO "kmem_test() - start\n"); 37291 test_cachep = 37292 kmem_cache_create("test-cachep", 16, 0, 37293 SLAB_RED_ZONE|SLAB_POISON, 37294 NULL, NULL); 37295 if (test_cachep) { 37296 char *objp = 37297 kmem_cache_alloc(test_cachep, SLAB_KERNEL); 37298 if (objp) { 37299 /* Write in front and past end, red-zone test. */ 37300 *(objp-1) = 1; 37301 *(objp+16) = 1; 37302 kmem_cache_free(test_cachep, objp); 37303 37304 /* Mess up poisoning. */ 37305 *objp = 10; 37306 objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL); 37307 kmem_cache_free(test_cachep, objp); 37308 37309 /* Mess up poisoning (again). */ 37310 *objp = 10; 37311 kmem_cache_shrink(test_cachep); 37312 } 37313 } 37314 printk(KERN_INFO "kmem_test() - finished\n"); 37315 } 37316 #endif /* SLAB_SELFTEST */ 37317 37318 #if defined(CONFIG_PROC_FS) 37319 /* /proc/slabinfo 37320 * cache-name num-active-objs total-objs num-active-slabs 37321 * ... total-slabs num-pages-per-slab 37322 */ 37323 int 37324 get_slabinfo(char *buf) 37325 { 37326 kmem_cache_t *cachep; 37327 kmem_slab_t *slabp; 37328 unsigned long active_objs; 37329 unsigned long save_flags; 37330 unsigned long num_slabs; 37331 unsigned long num_objs; 37332 int len=0; 37333 #if SLAB_STATS 37334 unsigned long active_slabs; 37335 #endif /* SLAB_STATS */ 37336 37337 __save_flags(save_flags); 37338 37339 /* Output format version, so at least we can change it 37340 * without _too_ many complaints. */ 37341 #if SLAB_STATS 37342 len = sprintf(buf, 37343 "slabinfo - version: 1.0 (statistics)\n"); 37344 #else 37345 len = sprintf(buf, "slabinfo - version: 1.0\n"); 37346 #endif /* SLAB_STATS */ 37347 down(&cache_chain_sem); 37348 cachep = &cache_cache; 37349 do { 37350 #if SLAB_STATS 37351 active_slabs = 0; 37352 #endif /* SLAB_STATS */ 37353 num_slabs = active_objs = 0; 37354 spin_lock_irq(&cachep->c_spinlock); 37355 for (slabp = cachep->c_firstp; 37356 slabp != kmem_slab_end(cachep); 37357 slabp = slabp->s_nextp) { 37358 active_objs += slabp->s_inuse; 37359 num_slabs++; 37360 #if SLAB_STATS 37361 if (slabp->s_inuse) 37362 active_slabs++; 37363 #endif /* SLAB_STATS */ 37364 } 37365 num_objs = cachep->c_num*num_slabs; 37366 #if SLAB_STATS 37367 { 37368 unsigned long errors; 37369 unsigned long high = cachep->c_high_mark; 37370 unsigned long grown = cachep->c_grown; 37371 unsigned long reaped = cachep->c_reaped; 37372 unsigned long allocs = cachep->c_num_allocations; 37373 errors = 37374 (unsigned long) atomic_read(&cachep->c_errors); 37375 spin_unlock_irqrestore(&cachep->c_spinlock, 37376 save_flags); 37377 len += sprintf(buf+len, 37378 "%-16s %6lu %6lu %4lu %4lu %4lu " 37379 "%6lu %7lu %5lu %4lu %4lu\n", 37380 cachep->c_name, active_objs, 37381 num_objs, active_slabs, num_slabs, 37382 (1<<cachep->c_gfporder)*num_slabs, 37383 high, allocs, grown, reaped,errors); 37384 } 37385 #else 37386 spin_unlock_irqrestore(&cachep->c_spinlock, 37387 save_flags); 37388 len += sprintf(buf+len, "%-17s %6lu %6lu\n", 37389 cachep->c_name, active_objs,num_objs); 37390 #endif /* SLAB_STATS */ 37391 } while ((cachep = cachep->c_nextp) != &cache_cache); 37392 up(&cache_chain_sem); 37393 37394 return len; 37395 } 37396 #endif /* CONFIG_PROC_FS */

Содержание раздела