2 * linux/mm/page_alloc.c 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 * Swap reorganised 29.12.95, Stephen Tweedie 8 #include <linux/config.h> 10 #include <linux/sched.h> 11 #include <linux/head.h> 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/errno.h> 15 #include <linux/string.h> 16 #include <linux/stat.h> 17 #include <linux/swap.h> 19 #include <linux/swapctl.h> 20 #include <linux/interrupt.h> 21 #include <linux/init.h> 22 #include <linux/pagemap.h> 25 #include <asm/system.h>/* for cli()/sti() */ 26 #include <asm/uaccess.h>/* for copy_to/from_user */ 27 #include <asm/bitops.h> 28 #include <asm/pgtable.h> 29 #include <asm/spinlock.h> 35 * Free area management 37 * The free_area_list arrays point to the queue heads of the free areas 42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram 43 for the ring buffers */ 44 #define NR_MEM_LISTS 12 46 #define NR_MEM_LISTS 6 49 /* The start of this MUST match the start of "struct page" */ 50 struct free_area_struct
{ 56 #define memory_head(x) ((struct page *)(x)) 58 static struct free_area_struct free_area
[NR_MEM_LISTS
]; 60 staticinlinevoidinit_mem_queue(struct free_area_struct
* head
) 62 head
->next
=memory_head(head
); 63 head
->prev
=memory_head(head
); 66 staticinlinevoidadd_mem_queue(struct free_area_struct
* head
,struct page
* entry
) 68 struct page
* next
= head
->next
; 70 entry
->prev
=memory_head(head
); 76 staticinlinevoidremove_mem_queue(struct page
* entry
) 78 struct page
* next
= entry
->next
; 79 struct page
* prev
= entry
->prev
; 85 * Free_page() adds the page to the free lists. This is optimized for 86 * fast normal cases (no error jumps taken normally). 88 * The way to optimize jumps for gcc-2.2.2 is to: 89 * - select the "normal" case and put it inside the if () { XXX } 90 * - no else-statements if you can avoid them 92 * With the above two rules, you get a straight-line execution path 93 * for the normal case, giving better asm-code. 97 * Buddy system. Hairy. You really aren't expected to understand this 99 * Hint: -mask = 1+~mask 102 static spinlock_t page_alloc_lock
; 106 * This routine is used by the kernel swap deamon to determine 107 * whether we have "enough" free pages. It is fairly arbitrary, 108 * but this had better return false if any reasonable "get_free_page()" 109 * allocation could currently fail.. 111 * Currently we approve of the following situations: 112 * - the highest memory order has two entries 113 * - the highest memory order has one free entry and: 114 * - the next-highest memory order has two free entries 115 * - the highest memory order has one free entry and: 116 * - the next-highest memory order has one free entry 117 * - the next-next-highest memory order has two free entries 119 * [previously, there had to be two entries of the highest memory 120 * order, but this lead to problems on large-memory machines.] 122 intfree_memory_available(void) 126 struct free_area_struct
* list
= NULL
; 128 spin_lock_irqsave(&page_alloc_lock
, flags
); 129 /* We fall through the loop if the list contains one 130 * item. -- thanks to Colin Plumb <colin@nyx.net> 132 for(i
=1; i
<4; ++i
) { 133 list
= free_area
+ NR_MEM_LISTS
- i
; 134 if(list
->next
==memory_head(list
)) 136 if(list
->next
->next
==memory_head(list
)) 141 spin_unlock_irqrestore(&page_alloc_lock
, flags
); 145 staticinlinevoidfree_pages_ok(unsigned long map_nr
,unsigned long order
) 147 struct free_area_struct
*area
= free_area
+ order
; 148 unsigned long index
= map_nr
>> (1+ order
); 149 unsigned long mask
= (~0UL) << order
; 152 spin_lock_irqsave(&page_alloc_lock
, flags
); 154 #define list(x) (mem_map+(x)) 157 nr_free_pages
-= mask
; 158 while(mask
+ (1<< (NR_MEM_LISTS
-1))) { 159 if(!test_and_change_bit(index
, area
->map
)) 161 remove_mem_queue(list(map_nr
^ -mask
)); 167 add_mem_queue(area
,list(map_nr
)); 171 spin_unlock_irqrestore(&page_alloc_lock
, flags
); 174 void__free_page(struct page
*page
) 176 if(!PageReserved(page
) &&atomic_dec_and_test(&page
->count
)) { 177 if(PageSwapCache(page
)) 178 panic("Freeing swap cache page"); 179 free_pages_ok(page
->map_nr
,0); 181 if(PageSwapCache(page
) &&atomic_read(&page
->count
) ==1) 182 panic("Releasing swap cache page"); 185 voidfree_pages(unsigned long addr
,unsigned long order
) 187 unsigned long map_nr
=MAP_NR(addr
); 189 if(map_nr
< max_mapnr
) { 190 mem_map_t
* map
= mem_map
+ map_nr
; 191 if(PageReserved(map
)) 193 if(atomic_dec_and_test(&map
->count
)) { 194 if(PageSwapCache(map
)) 195 panic("Freeing swap cache pages"); 196 free_pages_ok(map_nr
, order
); 199 if(PageSwapCache(map
) &&atomic_read(&map
->count
) ==1) 200 panic("Releasing swap cache pages at %p", 201 __builtin_return_address(0)); 206 * Some ugly macros to speed up __get_free_pages().. 208 #define MARK_USED(index, order, area) \ 209 change_bit((index) >> (1+(order)), (area)->map) 210 #define CAN_DMA(x) (PageDMA(x)) 211 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) 212 #define RMQUEUE(order, maxorder, dma) \ 213 do { struct free_area_struct * area = free_area+order; \ 214 unsigned long new_order = order; \ 215 do { struct page *prev = memory_head(area), *ret = prev->next; \ 216 while (memory_head(area) != ret) { \ 217 if (new_order >= maxorder && ret->next == prev) \ 219 if (!dma || CAN_DMA(ret)) { \ 220 unsigned long map_nr = ret->map_nr; \ 221 (prev->next = ret->next)->prev = prev; \ 222 MARK_USED(map_nr, new_order, area); \ 223 nr_free_pages -= 1 << order; \ 224 EXPAND(ret, map_nr, order, new_order, area); \ 225 spin_unlock_irqrestore(&page_alloc_lock, flags); \ 226 return ADDRESS(map_nr); \ 231 new_order++; area++; \ 232 } while (new_order < NR_MEM_LISTS); \ 235 #define EXPAND(map,index,low,high,area) \ 236 do { unsigned long size = 1 << high; \ 237 while (high > low) { \ 238 area--; high--; size >>= 1; \ 239 add_mem_queue(area, map); \ 240 MARK_USED(index, high, area); \ 244 atomic_set(&map->count, 1); \ 245 map->age = PAGE_INITIAL_AGE; \ 248 unsigned long__get_free_pages(int gfp_mask
,unsigned long order
) 250 unsigned long flags
, maxorder
; 252 if(order
>= NR_MEM_LISTS
) 256 * "maxorder" is the highest order number that we're allowed 257 * to empty in order to find a free page.. 259 maxorder
= order
+ NR_MEM_LISTS
/3; 260 if(gfp_mask
& __GFP_MED
) 261 maxorder
+= NR_MEM_LISTS
/3; 262 if((gfp_mask
& __GFP_HIGH
) || maxorder
> NR_MEM_LISTS
) 263 maxorder
= NR_MEM_LISTS
; 265 if(in_interrupt() && (gfp_mask
& __GFP_WAIT
)) { 268 printk("gfp called nonatomically from interrupt %p\n", 269 __builtin_return_address(0)); 270 gfp_mask
&= ~__GFP_WAIT
; 275 spin_lock_irqsave(&page_alloc_lock
, flags
); 276 RMQUEUE(order
, maxorder
, (gfp_mask
& GFP_DMA
)); 277 spin_unlock_irqrestore(&page_alloc_lock
, flags
); 278 if((gfp_mask
& __GFP_WAIT
) &&try_to_free_page(gfp_mask
)) 285 * Show free area list (used inside shift_scroll-lock stuff) 286 * We also calculate the percentage fragmentation. We do this by counting the 287 * memory on each free list with the exception of the first item on the list. 289 voidshow_free_areas(void) 291 unsigned long order
, flags
; 292 unsigned long total
=0; 294 printk("Free pages: %6dkB\n( ",nr_free_pages
<<(PAGE_SHIFT
-10)); 295 spin_lock_irqsave(&page_alloc_lock
, flags
); 296 for(order
=0; order
< NR_MEM_LISTS
; order
++) { 299 for(tmp
= free_area
[order
].next
; tmp
!=memory_head(free_area
+order
) ; tmp
= tmp
->next
) { 302 total
+= nr
* ((PAGE_SIZE
>>10) << order
); 303 printk("%lu*%lukB ", nr
, (unsigned long)((PAGE_SIZE
>>10) << order
)); 305 spin_unlock_irqrestore(&page_alloc_lock
, flags
); 306 printk("= %lukB)\n", total
); 307 #ifdef SWAP_CACHE_INFO 308 show_swap_cache_info(); 312 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) 315 * set up the free-area data structures: 316 * - mark all pages reserved 317 * - mark all memory queues empty 318 * - clear the memory bitmaps 320 __initfunc(unsigned longfree_area_init(unsigned long start_mem
,unsigned long end_mem
)) 323 unsigned long mask
= PAGE_MASK
; 327 * select nr of pages we try to keep free for important stuff 328 * with a minimum of 48 pages. This is totally arbitrary 330 i
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+7); 334 free_pages_low
= i
+ (i
>>1); 335 free_pages_high
= i
+ i
; 336 mem_map
= (mem_map_t
*)LONG_ALIGN(start_mem
); 337 p
= mem_map
+MAP_NR(end_mem
); 338 start_mem
=LONG_ALIGN((unsigned long) p
); 339 memset(mem_map
,0, start_mem
- (unsigned long) mem_map
); 342 atomic_set(&p
->count
,0); 343 p
->flags
= (1<< PG_DMA
) | (1<< PG_reserved
); 344 p
->map_nr
= p
- mem_map
; 347 for(i
=0; i
< NR_MEM_LISTS
; i
++) { 348 unsigned long bitmap_size
; 349 init_mem_queue(free_area
+i
); 351 end_mem
= (end_mem
+ ~mask
) & mask
; 352 bitmap_size
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+ i
); 353 bitmap_size
= (bitmap_size
+7) >>3; 354 bitmap_size
=LONG_ALIGN(bitmap_size
); 355 free_area
[i
].map
= (unsigned int*) start_mem
; 356 memset((void*) start_mem
,0, bitmap_size
); 357 start_mem
+= bitmap_size
; 363 * The tests may look silly, but it essentially makes sure that 364 * no other process did a swap-in on us just as we were waiting. 366 * Also, don't bother to add to the swap cache if this page-in 367 * was due to a write access. 369 voidswap_in(struct task_struct
* tsk
,struct vm_area_struct
* vma
, 370 pte_t
* page_table
,unsigned long entry
,int write_access
) 373 struct page
*page_map
; 375 page_map
=read_swap_cache(entry
); 377 if(pte_val(*page_table
) != entry
) { 379 free_page_and_swap_cache(page_address(page_map
)); 383 set_pte(page_table
, BAD_PAGE
); 389 page
=page_address(page_map
); 394 if(!write_access
||is_page_shared(page_map
)) { 395 set_pte(page_table
,mk_pte(page
, vma
->vm_page_prot
)); 399 /* The page is unshared, and we want write access. In this 400 case, it is safe to tear down the swap cache and give the 401 page over entirely to this process. */ 403 delete_from_swap_cache(page_map
); 404 set_pte(page_table
,pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
))));