Import 2.1.89
[davej-history.git] / mm / page_alloc.c
blobc3aef902f073b3dab7b636ec58a3c27a5f44d91c
1 /*
2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
6 */
8 #include <linux/config.h>
9 #include <linux/mm.h>
10 #include <linux/sched.h>
11 #include <linux/head.h>
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/stat.h>
17 #include <linux/swap.h>
18 #include <linux/fs.h>
19 #include <linux/swapctl.h>
20 #include <linux/interrupt.h>
21 #include <linux/init.h>
22 #include <linux/pagemap.h>
24 #include <asm/dma.h>
25 #include <asm/system.h>/* for cli()/sti() */
26 #include <asm/uaccess.h>/* for copy_to/from_user */
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/spinlock.h>
31 int nr_swap_pages =0;
32 int nr_free_pages =0;
35 * Free area management
37 * The free_area_list arrays point to the queue heads of the free areas
38 * of different sizes
41 #if CONFIG_AP1000
42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
43 for the ring buffers */
44 #define NR_MEM_LISTS 12
45 #else
46 #define NR_MEM_LISTS 6
47 #endif
49 /* The start of this MUST match the start of "struct page" */
50 struct free_area_struct {
51 struct page *next;
52 struct page *prev;
53 unsigned int* map;
56 #define memory_head(x) ((struct page *)(x))
58 static struct free_area_struct free_area[NR_MEM_LISTS];
60 staticinlinevoidinit_mem_queue(struct free_area_struct * head)
62 head->next =memory_head(head);
63 head->prev =memory_head(head);
66 staticinlinevoidadd_mem_queue(struct free_area_struct * head,struct page * entry)
68 struct page * next = head->next;
70 entry->prev =memory_head(head);
71 entry->next = next;
72 next->prev = entry;
73 head->next = entry;
76 staticinlinevoidremove_mem_queue(struct page * entry)
78 struct page * next = entry->next;
79 struct page * prev = entry->prev;
80 next->prev = prev;
81 prev->next = next;
85 * Free_page() adds the page to the free lists. This is optimized for
86 * fast normal cases (no error jumps taken normally).
88 * The way to optimize jumps for gcc-2.2.2 is to:
89 * - select the "normal" case and put it inside the if () { XXX }
90 * - no else-statements if you can avoid them
92 * With the above two rules, you get a straight-line execution path
93 * for the normal case, giving better asm-code.
97 * Buddy system. Hairy. You really aren't expected to understand this
99 * Hint: -mask = 1+~mask
101 #ifdef __SMP__
102 static spinlock_t page_alloc_lock;
103 #endif
106 * This routine is used by the kernel swap deamon to determine
107 * whether we have "enough" free pages. It is fairly arbitrary,
108 * but this had better return false if any reasonable "get_free_page()"
109 * allocation could currently fail..
111 * Currently we approve of the following situations:
112 * - the highest memory order has two entries
113 * - the highest memory order has one free entry and:
114 * - the next-highest memory order has two free entries
115 * - the highest memory order has one free entry and:
116 * - the next-highest memory order has one free entry
117 * - the next-next-highest memory order has two free entries
119 * [previously, there had to be two entries of the highest memory
120 * order, but this lead to problems on large-memory machines.]
122 intfree_memory_available(void)
124 int i, retval =0;
125 unsigned long flags;
126 struct free_area_struct * list = NULL;
128 spin_lock_irqsave(&page_alloc_lock, flags);
129 /* We fall through the loop if the list contains one
130 * item. -- thanks to Colin Plumb <colin@nyx.net>
132 for(i =1; i <4; ++i) {
133 list = free_area + NR_MEM_LISTS - i;
134 if(list->next ==memory_head(list))
135 break;
136 if(list->next->next ==memory_head(list))
137 continue;
138 retval =1;
139 break;
141 spin_unlock_irqrestore(&page_alloc_lock, flags);
142 return retval;
145 staticinlinevoidfree_pages_ok(unsigned long map_nr,unsigned long order)
147 struct free_area_struct *area = free_area + order;
148 unsigned long index = map_nr >> (1+ order);
149 unsigned long mask = (~0UL) << order;
150 unsigned long flags;
152 spin_lock_irqsave(&page_alloc_lock, flags);
154 #define list(x) (mem_map+(x))
156 map_nr &= mask;
157 nr_free_pages -= mask;
158 while(mask + (1<< (NR_MEM_LISTS-1))) {
159 if(!test_and_change_bit(index, area->map))
160 break;
161 remove_mem_queue(list(map_nr ^ -mask));
162 mask <<=1;
163 area++;
164 index >>=1;
165 map_nr &= mask;
167 add_mem_queue(area,list(map_nr));
169 #undef list
171 spin_unlock_irqrestore(&page_alloc_lock, flags);
174 void__free_page(struct page *page)
176 if(!PageReserved(page) &&atomic_dec_and_test(&page->count)) {
177 if(PageSwapCache(page))
178 panic("Freeing swap cache page");
179 free_pages_ok(page->map_nr,0);
181 if(PageSwapCache(page) &&atomic_read(&page->count) ==1)
182 panic("Releasing swap cache page");
185 voidfree_pages(unsigned long addr,unsigned long order)
187 unsigned long map_nr =MAP_NR(addr);
189 if(map_nr < max_mapnr) {
190 mem_map_t * map = mem_map + map_nr;
191 if(PageReserved(map))
192 return;
193 if(atomic_dec_and_test(&map->count)) {
194 if(PageSwapCache(map))
195 panic("Freeing swap cache pages");
196 free_pages_ok(map_nr, order);
197 return;
199 if(PageSwapCache(map) &&atomic_read(&map->count) ==1)
200 panic("Releasing swap cache pages at %p",
201 __builtin_return_address(0));
206 * Some ugly macros to speed up __get_free_pages()..
208 #define MARK_USED(index, order, area) \
209 change_bit((index) >> (1+(order)), (area)->map)
210 #define CAN_DMA(x) (PageDMA(x))
211 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
212 #define RMQUEUE(order, maxorder, dma) \
213 do { struct free_area_struct * area = free_area+order; \
214 unsigned long new_order = order; \
215 do { struct page *prev = memory_head(area), *ret = prev->next; \
216 while (memory_head(area) != ret) { \
217 if (new_order >= maxorder && ret->next == prev) \
218 break; \
219 if (!dma || CAN_DMA(ret)) { \
220 unsigned long map_nr = ret->map_nr; \
221 (prev->next = ret->next)->prev = prev; \
222 MARK_USED(map_nr, new_order, area); \
223 nr_free_pages -= 1 << order; \
224 EXPAND(ret, map_nr, order, new_order, area); \
225 spin_unlock_irqrestore(&page_alloc_lock, flags); \
226 return ADDRESS(map_nr); \
228 prev = ret; \
229 ret = ret->next; \
231 new_order++; area++; \
232 } while (new_order < NR_MEM_LISTS); \
233 } while (0)
235 #define EXPAND(map,index,low,high,area) \
236 do { unsigned long size = 1 << high; \
237 while (high > low) { \
238 area--; high--; size >>= 1; \
239 add_mem_queue(area, map); \
240 MARK_USED(index, high, area); \
241 index += size; \
242 map += size; \
244 atomic_set(&map->count, 1); \
245 map->age = PAGE_INITIAL_AGE; \
246 } while (0)
248 unsigned long__get_free_pages(int gfp_mask,unsigned long order)
250 unsigned long flags, maxorder;
252 if(order >= NR_MEM_LISTS)
253 goto nopage;
256 * "maxorder" is the highest order number that we're allowed
257 * to empty in order to find a free page..
259 maxorder = order + NR_MEM_LISTS/3;
260 if(gfp_mask & __GFP_MED)
261 maxorder += NR_MEM_LISTS/3;
262 if((gfp_mask & __GFP_HIGH) || maxorder > NR_MEM_LISTS)
263 maxorder = NR_MEM_LISTS;
265 if(in_interrupt() && (gfp_mask & __GFP_WAIT)) {
266 static int count =0;
267 if(++count <5) {
268 printk("gfp called nonatomically from interrupt %p\n",
269 __builtin_return_address(0));
270 gfp_mask &= ~__GFP_WAIT;
274 repeat:
275 spin_lock_irqsave(&page_alloc_lock, flags);
276 RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA));
277 spin_unlock_irqrestore(&page_alloc_lock, flags);
278 if((gfp_mask & __GFP_WAIT) &&try_to_free_page(gfp_mask))
279 goto repeat;
280 nopage:
281 return0;
285 * Show free area list (used inside shift_scroll-lock stuff)
286 * We also calculate the percentage fragmentation. We do this by counting the
287 * memory on each free list with the exception of the first item on the list.
289 voidshow_free_areas(void)
291 unsigned long order, flags;
292 unsigned long total =0;
294 printk("Free pages: %6dkB\n( ",nr_free_pages<<(PAGE_SHIFT-10));
295 spin_lock_irqsave(&page_alloc_lock, flags);
296 for(order=0; order < NR_MEM_LISTS; order++) {
297 struct page * tmp;
298 unsigned long nr =0;
299 for(tmp = free_area[order].next ; tmp !=memory_head(free_area+order) ; tmp = tmp->next) {
300 nr ++;
302 total += nr * ((PAGE_SIZE>>10) << order);
303 printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
305 spin_unlock_irqrestore(&page_alloc_lock, flags);
306 printk("= %lukB)\n", total);
307 #ifdef SWAP_CACHE_INFO
308 show_swap_cache_info();
309 #endif
312 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
315 * set up the free-area data structures:
316 * - mark all pages reserved
317 * - mark all memory queues empty
318 * - clear the memory bitmaps
320 __initfunc(unsigned longfree_area_init(unsigned long start_mem,unsigned long end_mem))
322 mem_map_t * p;
323 unsigned long mask = PAGE_MASK;
324 int i;
327 * select nr of pages we try to keep free for important stuff
328 * with a minimum of 48 pages. This is totally arbitrary
330 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
331 if(i <48)
332 i =48;
333 min_free_pages = i;
334 free_pages_low = i + (i>>1);
335 free_pages_high = i + i;
336 mem_map = (mem_map_t *)LONG_ALIGN(start_mem);
337 p = mem_map +MAP_NR(end_mem);
338 start_mem =LONG_ALIGN((unsigned long) p);
339 memset(mem_map,0, start_mem - (unsigned long) mem_map);
341 --p;
342 atomic_set(&p->count,0);
343 p->flags = (1<< PG_DMA) | (1<< PG_reserved);
344 p->map_nr = p - mem_map;
345 }while(p > mem_map);
347 for(i =0; i < NR_MEM_LISTS ; i++) {
348 unsigned long bitmap_size;
349 init_mem_queue(free_area+i);
350 mask += mask;
351 end_mem = (end_mem + ~mask) & mask;
352 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
353 bitmap_size = (bitmap_size +7) >>3;
354 bitmap_size =LONG_ALIGN(bitmap_size);
355 free_area[i].map = (unsigned int*) start_mem;
356 memset((void*) start_mem,0, bitmap_size);
357 start_mem += bitmap_size;
359 return start_mem;
363 * The tests may look silly, but it essentially makes sure that
364 * no other process did a swap-in on us just as we were waiting.
366 * Also, don't bother to add to the swap cache if this page-in
367 * was due to a write access.
369 voidswap_in(struct task_struct * tsk,struct vm_area_struct * vma,
370 pte_t * page_table,unsigned long entry,int write_access)
372 unsigned long page;
373 struct page *page_map;
375 page_map =read_swap_cache(entry);
377 if(pte_val(*page_table) != entry) {
378 if(page_map)
379 free_page_and_swap_cache(page_address(page_map));
380 return;
382 if(!page_map) {
383 set_pte(page_table, BAD_PAGE);
384 swap_free(entry);
385 oom(tsk);
386 return;
389 page =page_address(page_map);
390 vma->vm_mm->rss++;
391 tsk->min_flt++;
392 swap_free(entry);
394 if(!write_access ||is_page_shared(page_map)) {
395 set_pte(page_table,mk_pte(page, vma->vm_page_prot));
396 return;
399 /* The page is unshared, and we want write access. In this
400 case, it is safe to tear down the swap cache and give the
401 page over entirely to this process. */
403 delete_from_swap_cache(page_map);
404 set_pte(page_table,pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
405 return;
close