mm/page_alloc.c

Name: Public Git Hosting - davej-history.git/blob - mm/page_alloc.c
Rating: 4.8 (4870 reviews)
 1 /*
 2  * linux/mm/page_alloc.c
 3  *
 4  * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
 5  * Swap reorganised 29.12.95, Stephen Tweedie
 6  */
 7
 8 #include <linux/config.h>
 9 #include <linux/mm.h>
 10 #include <linux/sched.h>
 11 #include <linux/head.h>
 12 #include <linux/kernel.h>
 13 #include <linux/kernel_stat.h>
 14 #include <linux/errno.h>
 15 #include <linux/string.h>
 16 #include <linux/stat.h>
 17 #include <linux/swap.h>
 18 #include <linux/fs.h>
 19 #include <linux/swapctl.h>
 20 #include <linux/interrupt.h>
 21 #include <linux/init.h>
 22 #include <linux/pagemap.h>
 23
 24 #include <asm/dma.h>
 25 #include <asm/system.h>/* for cli()/sti() */
 26 #include <asm/uaccess.h>/* for copy_to/from_user */
 27 #include <asm/bitops.h>
 28 #include <asm/pgtable.h>
 29 #include <asm/spinlock.h>
 30
 31 int nr_swap_pages =0;
 32 int nr_free_pages =0;
 33
 34 /*
 35  * Free area management
 36  *
 37  * The free_area_list arrays point to the queue heads of the free areas
 38  * of different sizes
 39  */
 40
 41 #if CONFIG_AP1000
 42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
 43  for the ring buffers */
 44 #define NR_MEM_LISTS 12
 45 #else
 46 #define NR_MEM_LISTS 6
 47 #endif
 48
 49 /* The start of this MUST match the start of "struct page" */
 50 struct free_area_struct {
 51 struct page *next;
 52 struct page *prev;
 53 unsigned int* map;
 54 };
 55
 56 #define memory_head(x) ((struct page *)(x))
 57
 58 static struct free_area_struct free_area[NR_MEM_LISTS];
 59
 60 staticinlinevoidinit_mem_queue(struct free_area_struct * head)
 61 {
 62  head->next =memory_head(head);
 63  head->prev =memory_head(head);
 64 }
 65
 66 staticinlinevoidadd_mem_queue(struct free_area_struct * head,struct page * entry)
 67 {
 68 struct page * next = head->next;
 69
 70  entry->prev =memory_head(head);
 71  entry->next = next;
 72  next->prev = entry;
 73  head->next = entry;
 74 }
 75
 76 staticinlinevoidremove_mem_queue(struct page * entry)
 77 {
 78 struct page * next = entry->next;
 79 struct page * prev = entry->prev;
 80  next->prev = prev;
 81  prev->next = next;
 82 }
 83
 84 /*
 85  * Free_page() adds the page to the free lists. This is optimized for
 86  * fast normal cases (no error jumps taken normally).
 87  *
 88  * The way to optimize jumps for gcc-2.2.2 is to:
 89  * - select the "normal" case and put it inside the if () { XXX }
 90  * - no else-statements if you can avoid them
 91  *
 92  * With the above two rules, you get a straight-line execution path
 93  * for the normal case, giving better asm-code.
 94  */
 95
 96 /*
 97  * Buddy system. Hairy. You really aren't expected to understand this
 98  *
 99  * Hint: -mask = 1+~mask
 100  */
 101 #ifdef __SMP__
 102 static spinlock_t page_alloc_lock;
 103 #endif
 104
 105 /*
 106  * This routine is used by the kernel swap deamon to determine
 107  * whether we have "enough" free pages. It is fairly arbitrary,
 108  * but this had better return false if any reasonable "get_free_page()"
 109  * allocation could currently fail..
 110  *
 111  * Currently we approve of the following situations:
 112  * - the highest memory order has two entries
 113  * - the highest memory order has one free entry and:
 114  * - the next-highest memory order has two free entries
 115  * - the highest memory order has one free entry and:
 116  * - the next-highest memory order has one free entry
 117  * - the next-next-highest memory order has two free entries
 118  *
 119  * [previously, there had to be two entries of the highest memory
 120  * order, but this lead to problems on large-memory machines.]
 121  */
 122 intfree_memory_available(void)
 123 {
 124 int i, retval =0;
 125 unsigned long flags;
 126 struct free_area_struct * list = NULL;
 127
 128 spin_lock_irqsave(&page_alloc_lock, flags);
 129 /* We fall through the loop if the list contains one
 130  * item. -- thanks to Colin Plumb <colin@nyx.net>
 131  */
 132 for(i =1; i <4; ++i) {
 133  list = free_area + NR_MEM_LISTS - i;
 134 if(list->next ==memory_head(list))
 135 break;
 136 if(list->next->next ==memory_head(list))
 137 continue;
 138  retval =1;
 139 break;
 140 }
 141 spin_unlock_irqrestore(&page_alloc_lock, flags);
 142 return retval;
 143 }
 144
 145 staticinlinevoidfree_pages_ok(unsigned long map_nr,unsigned long order)
 146 {
 147 struct free_area_struct *area = free_area + order;
 148 unsigned long index = map_nr >> (1+ order);
 149 unsigned long mask = (~0UL) << order;
 150 unsigned long flags;
 151
 152 spin_lock_irqsave(&page_alloc_lock, flags);
 153
 154 #define list(x) (mem_map+(x))
 155
 156  map_nr &= mask;
 157  nr_free_pages -= mask;
 158 while(mask + (1<< (NR_MEM_LISTS-1))) {
 159 if(!test_and_change_bit(index, area->map))
 160 break;
 161 remove_mem_queue(list(map_nr ^ -mask));
 162  mask <<=1;
 163  area++;
 164  index >>=1;
 165  map_nr &= mask;
 166 }
 167 add_mem_queue(area,list(map_nr));
 168
 169 #undef list
 170
 171 spin_unlock_irqrestore(&page_alloc_lock, flags);
 172 }
 173
 174 void__free_page(struct page *page)
 175 {
 176 if(!PageReserved(page) &&atomic_dec_and_test(&page->count)) {
 177 if(PageSwapCache(page))
 178 panic("Freeing swap cache page");
 179 free_pages_ok(page->map_nr,0);
 180 }
 181 if(PageSwapCache(page) &&atomic_read(&page->count) ==1)
 182 panic("Releasing swap cache page");
 183 }
 184
 185 voidfree_pages(unsigned long addr,unsigned long order)
 186 {
 187 unsigned long map_nr =MAP_NR(addr);
 188
 189 if(map_nr < max_mapnr) {
 190  mem_map_t * map = mem_map + map_nr;
 191 if(PageReserved(map))
 192 return;
 193 if(atomic_dec_and_test(&map->count)) {
 194 if(PageSwapCache(map))
 195 panic("Freeing swap cache pages");
 196 free_pages_ok(map_nr, order);
 197 return;
 198 }
 199 if(PageSwapCache(map) &&atomic_read(&map->count) ==1)
 200 panic("Releasing swap cache pages at %p",
 201 __builtin_return_address(0));
 202 }
 203 }
 204
 205 /*
 206  * Some ugly macros to speed up __get_free_pages()..
 207  */
 208 #define MARK_USED(index, order, area) \
 209  change_bit((index) >> (1+(order)), (area)->map)
 210 #define CAN_DMA(x) (PageDMA(x))
 211 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
 212 #define RMQUEUE(order, maxorder, dma) \
 213 do { struct free_area_struct * area = free_area+order; \
 214  unsigned long new_order = order; \
 215  do { struct page *prev = memory_head(area), *ret = prev->next; \
 216  while (memory_head(area) != ret) { \
 217  if (new_order >= maxorder && ret->next == prev) \
 218  break; \
 219  if (!dma || CAN_DMA(ret)) { \
 220  unsigned long map_nr = ret->map_nr; \
 221  (prev->next = ret->next)->prev = prev; \
 222  MARK_USED(map_nr, new_order, area); \
 223  nr_free_pages -= 1 << order; \
 224  EXPAND(ret, map_nr, order, new_order, area); \
 225  spin_unlock_irqrestore(&page_alloc_lock, flags); \
 226  return ADDRESS(map_nr); \
 227  } \
 228  prev = ret; \
 229  ret = ret->next; \
 230  } \
 231  new_order++; area++; \
 232  } while (new_order < NR_MEM_LISTS); \
 233 } while (0)
 234
 235 #define EXPAND(map,index,low,high,area) \
 236 do { unsigned long size = 1 << high; \
 237  while (high > low) { \
 238  area--; high--; size >>= 1; \
 239  add_mem_queue(area, map); \
 240  MARK_USED(index, high, area); \
 241  index += size; \
 242  map += size; \
 243  } \
 244  atomic_set(&map->count, 1); \
 245  map->age = PAGE_INITIAL_AGE; \
 246 } while (0)
 247
 248 unsigned long__get_free_pages(int gfp_mask,unsigned long order)
 249 {
 250 unsigned long flags, maxorder;
 251
 252 if(order >= NR_MEM_LISTS)
 253 goto nopage;
 254
 255 /*
 256  * "maxorder" is the highest order number that we're allowed
 257  * to empty in order to find a free page..
 258  */
 259  maxorder = order + NR_MEM_LISTS/3;
 260 if(gfp_mask & __GFP_MED)
 261  maxorder += NR_MEM_LISTS/3;
 262 if((gfp_mask & __GFP_HIGH) || maxorder > NR_MEM_LISTS)
 263  maxorder = NR_MEM_LISTS;
 264
 265 if(in_interrupt() && (gfp_mask & __GFP_WAIT)) {
 266 static int count =0;
 267 if(++count <5) {
 268 printk("gfp called nonatomically from interrupt %p\n",
 269 __builtin_return_address(0));
 270  gfp_mask &= ~__GFP_WAIT;
 271 }
 272 }
 273
 274 repeat:
 275 spin_lock_irqsave(&page_alloc_lock, flags);
 276 RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA));
 277 spin_unlock_irqrestore(&page_alloc_lock, flags);
 278 if((gfp_mask & __GFP_WAIT) &&try_to_free_page(gfp_mask))
 279 goto repeat;
 280 nopage:
 281 return0;
 282 }
 283
 284 /*
 285  * Show free area list (used inside shift_scroll-lock stuff)
 286  * We also calculate the percentage fragmentation. We do this by counting the
 287  * memory on each free list with the exception of the first item on the list.
 288  */
 289 voidshow_free_areas(void)
 290 {
 291 unsigned long order, flags;
 292 unsigned long total =0;
 293
 294 printk("Free pages: %6dkB\n( ",nr_free_pages<<(PAGE_SHIFT-10));
 295 spin_lock_irqsave(&page_alloc_lock, flags);
 296 for(order=0; order < NR_MEM_LISTS; order++) {
 297 struct page * tmp;
 298 unsigned long nr =0;
 299 for(tmp = free_area[order].next ; tmp !=memory_head(free_area+order) ; tmp = tmp->next) {
 300  nr ++;
 301 }
 302  total += nr * ((PAGE_SIZE>>10) << order);
 303 printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
 304 }
 305 spin_unlock_irqrestore(&page_alloc_lock, flags);
 306 printk("= %lukB)\n", total);
 307 #ifdef SWAP_CACHE_INFO
 308 show_swap_cache_info();
 309 #endif
 310 }
 311
 312 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 313
 314 /*
 315  * set up the free-area data structures:
 316  * - mark all pages reserved
 317  * - mark all memory queues empty
 318  * - clear the memory bitmaps
 319  */
 320 __initfunc(unsigned longfree_area_init(unsigned long start_mem,unsigned long end_mem))
 321 {
 322  mem_map_t * p;
 323 unsigned long mask = PAGE_MASK;
 324 int i;
 325
 326 /*
 327  * select nr of pages we try to keep free for important stuff
 328  * with a minimum of 48 pages. This is totally arbitrary
 329  */
 330  i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
 331 if(i <48)
 332  i =48;
 333  min_free_pages = i;
 334  free_pages_low = i + (i>>1);
 335  free_pages_high = i + i;
 336  mem_map = (mem_map_t *)LONG_ALIGN(start_mem);
 337  p = mem_map +MAP_NR(end_mem);
 338  start_mem =LONG_ALIGN((unsigned long) p);
 339 memset(mem_map,0, start_mem - (unsigned long) mem_map);
 340 do{
 341 --p;
 342 atomic_set(&p->count,0);
 343  p->flags = (1<< PG_DMA) | (1<< PG_reserved);
 344  p->map_nr = p - mem_map;
 345 }while(p > mem_map);
 346
 347 for(i =0; i < NR_MEM_LISTS ; i++) {
 348 unsigned long bitmap_size;
 349 init_mem_queue(free_area+i);
 350  mask += mask;
 351  end_mem = (end_mem + ~mask) & mask;
 352  bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
 353  bitmap_size = (bitmap_size +7) >>3;
 354  bitmap_size =LONG_ALIGN(bitmap_size);
 355  free_area[i].map = (unsigned int*) start_mem;
 356 memset((void*) start_mem,0, bitmap_size);
 357  start_mem += bitmap_size;
 358 }
 359 return start_mem;
 360 }
 361
 362 /*
 363  * The tests may look silly, but it essentially makes sure that
 364  * no other process did a swap-in on us just as we were waiting.
 365  *
 366  * Also, don't bother to add to the swap cache if this page-in
 367  * was due to a write access.
 368  */
 369 voidswap_in(struct task_struct * tsk,struct vm_area_struct * vma,
 370  pte_t * page_table,unsigned long entry,int write_access)
 371 {
 372 unsigned long page;
 373 struct page *page_map;
 374
 375  page_map =read_swap_cache(entry);
 376
 377 if(pte_val(*page_table) != entry) {
 378 if(page_map)
 379 free_page_and_swap_cache(page_address(page_map));
 380 return;
 381 }
 382 if(!page_map) {
 383 set_pte(page_table, BAD_PAGE);
 384 swap_free(entry);
 385 oom(tsk);
 386 return;
 387 }
 388
 389  page =page_address(page_map);
 390  vma->vm_mm->rss++;
 391  tsk->min_flt++;
 392 swap_free(entry);
 393
 394 if(!write_access ||is_page_shared(page_map)) {
 395 set_pte(page_table,mk_pte(page, vma->vm_page_prot));
 396 return;
 397 }
 398
 399 /* The page is unshared, and we want write access. In this
 400  case, it is safe to tear down the swap cache and give the
 401  page over entirely to this process. */
 402
 403 delete_from_swap_cache(page_map);
 404 set_pte(page_table,pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 405 return;
 406 }