v2.4.10.4 -> v2.4.10.5
[opensuse:kernel.git] / mm / page_alloc.c
1 /*
2  *  linux/mm/page_alloc.c
3  *
4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
5  *  Swap reorganised 29.12.95, Stephen Tweedie
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
8  *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
9  *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
10  */
11
12 #include <linux/config.h>
13 #include <linux/mm.h>
14 #include <linux/swap.h>
15 #include <linux/swapctl.h>
16 #include <linux/interrupt.h>
17 #include <linux/pagemap.h>
18 #include <linux/bootmem.h>
19 #include <linux/slab.h>
20 #include <linux/compiler.h>
21
22 int nr_swap_pages;
23 int nr_active_pages;
24 int nr_inactive_pages;
25 struct list_head inactive_list;
26 struct list_head active_list;
27 pg_data_t *pgdat_list;
28
29 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
30 static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 32, 128, 128, };
31 static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
32 static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
33
34 /*
35  * Free_page() adds the page to the free lists. This is optimized for
36  * fast normal cases (no error jumps taken normally).
37  *
38  * The way to optimize jumps for gcc-2.2.2 is to:
39  *  - select the "normal" case and put it inside the if () { XXX }
40  *  - no else-statements if you can avoid them
41  *
42  * With the above two rules, you get a straight-line execution path
43  * for the normal case, giving better asm-code.
44  */
45
46 #define memlist_init(x) INIT_LIST_HEAD(x)
47 #define memlist_add_head list_add
48 #define memlist_add_tail list_add_tail
49 #define memlist_del list_del
50 #define memlist_entry list_entry
51 #define memlist_next(x) ((x)->next)
52 #define memlist_prev(x) ((x)->prev)
53
54 /*
55  * Temporary debugging check.
56  */
57 #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size))
58
59 /*
60  * Buddy system. Hairy. You really aren't expected to understand this
61  *
62  * Hint: -mask = 1+~mask
63  */
64
65 static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
66 static void __free_pages_ok (struct page *page, unsigned int order)
67 {
68         unsigned long index, page_idx, mask, flags;
69         free_area_t *area;
70         struct page *base;
71         zone_t *zone;
72
73         if (page->buffers)
74                 BUG();
75         if (page->mapping)
76                 BUG();
77         if (!VALID_PAGE(page))
78                 BUG();
79         if (PageSwapCache(page))
80                 BUG();
81         if (PageLocked(page))
82                 BUG();
83         if (PageDecrAfter(page))
84                 BUG();
85         if (PageActive(page))
86                 BUG();
87         if (PageInactive(page))
88                 BUG();
89         page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
90
91         if (current->flags & PF_FREE_PAGES)
92                 goto local_freelist;
93  back_local_freelist:
94
95         zone = page->zone;
96
97         mask = (~0UL) << order;
98         base = zone->zone_mem_map;
99         page_idx = page - base;
100         if (page_idx & ~mask)
101                 BUG();
102         index = page_idx >> (1 + order);
103
104         area = zone->free_area + order;
105
106         spin_lock_irqsave(&zone->lock, flags);
107
108         zone->free_pages -= mask;
109
110         while (mask + (1 << (MAX_ORDER-1))) {
111                 struct page *buddy1, *buddy2;
112
113                 if (area >= zone->free_area + MAX_ORDER)
114                         BUG();
115                 if (!__test_and_change_bit(index, area->map))
116                         /*
117                          * the buddy page is still allocated.
118                          */
119                         break;
120                 /*
121                  * Move the buddy up one level.
122                  */
123                 buddy1 = base + (page_idx ^ -mask);
124                 buddy2 = base + page_idx;
125                 if (BAD_RANGE(zone,buddy1))
126                         BUG();
127                 if (BAD_RANGE(zone,buddy2))
128                         BUG();
129
130                 memlist_del(&buddy1->list);
131                 mask <<= 1;
132                 area++;
133                 index >>= 1;
134                 page_idx &= mask;
135         }
136         memlist_add_head(&(base + page_idx)->list, &area->free_list);
137
138         spin_unlock_irqrestore(&zone->lock, flags);
139         return;
140
141  local_freelist:
142         /*
143          * This is a little subtle: if the allocation order
144          * wanted is major than zero we'd better take all the pages
145          * local since we must deal with fragmentation too and we
146          * can't rely on the nr_local_pages information.
147          */
148         if (current->nr_local_pages && !current->allocation_order)
149                 goto back_local_freelist;
150
151         list_add(&page->list, &current->local_pages);
152         page->index = order;
153         current->nr_local_pages++;
154 }
155
156 #define MARK_USED(index, order, area) \
157         __change_bit((index) >> (1+(order)), (area)->map)
158
159 static inline struct page * expand (zone_t *zone, struct page *page,
160          unsigned long index, int low, int high, free_area_t * area)
161 {
162         unsigned long size = 1 << high;
163
164         while (high > low) {
165                 if (BAD_RANGE(zone,page))
166                         BUG();
167                 area--;
168                 high--;
169                 size >>= 1;
170                 memlist_add_head(&(page)->list, &(area)->free_list);
171                 MARK_USED(index, high, area);
172                 index += size;
173                 page += size;
174         }
175         if (BAD_RANGE(zone,page))
176                 BUG();
177         return page;
178 }
179
180 static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
181 static struct page * rmqueue(zone_t *zone, unsigned int order)
182 {
183         free_area_t * area = zone->free_area + order;
184         unsigned int curr_order = order;
185         struct list_head *head, *curr;
186         unsigned long flags;
187         struct page *page;
188
189         spin_lock_irqsave(&zone->lock, flags);
190         do {
191                 head = &area->free_list;
192                 curr = memlist_next(head);
193
194                 if (curr != head) {
195                         unsigned int index;
196
197                         page = memlist_entry(curr, struct page, list);
198                         if (BAD_RANGE(zone,page))
199                                 BUG();
200                         memlist_del(curr);
201                         index = page - zone->zone_mem_map;
202                         if (curr_order != MAX_ORDER-1)
203                                 MARK_USED(index, curr_order, area);
204                         zone->free_pages -= 1UL << order;
205
206                         page = expand(zone, page, index, order, curr_order, area);
207                         spin_unlock_irqrestore(&zone->lock, flags);
208
209                         set_page_count(page, 1);
210                         if (BAD_RANGE(zone,page))
211                                 BUG();
212                         DEBUG_LRU_PAGE(page);
213                         return page;    
214                 }
215                 curr_order++;
216                 area++;
217         } while (curr_order < MAX_ORDER);
218         spin_unlock_irqrestore(&zone->lock, flags);
219
220         return NULL;
221 }
222
223 #ifndef CONFIG_DISCONTIGMEM
224 struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
225 {
226         return __alloc_pages(gfp_mask, order,
227                 contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
228 }
229 #endif
230
231 static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
232 static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
233 {
234         struct page * page = NULL;
235         int __freed = 0;
236
237         if (!(gfp_mask & __GFP_WAIT))
238                 goto out;
239         if (in_interrupt())
240                 BUG();
241
242         current->allocation_order = order;
243         current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
244
245         __freed = try_to_free_pages(classzone, gfp_mask, order);
246
247         current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
248
249         if (current->nr_local_pages) {
250                 struct list_head * entry, * local_pages;
251                 struct page * tmp;
252                 int nr_pages;
253
254                 local_pages = &current->local_pages;
255
256                 if (likely(__freed)) {
257                         /* pick from the last inserted so we're lifo */
258                         entry = local_pages->next;
259                         do {
260                                 tmp = list_entry(entry, struct page, list);
261                                 if (tmp->index == order && memclass(tmp->zone, classzone)) {
262                                         list_del(entry);
263                                         current->nr_local_pages--;
264                                         set_page_count(tmp, 1);
265                                         page = tmp;
266
267                                         if (page->buffers)
268                                                 BUG();
269                                         if (page->mapping)
270                                                 BUG();
271                                         if (!VALID_PAGE(page))
272                                                 BUG();
273                                         if (PageSwapCache(page))
274                                                 BUG();
275                                         if (PageLocked(page))
276                                                 BUG();
277                                         if (PageDecrAfter(page))
278                                                 BUG();
279                                         if (PageActive(page))
280                                                 BUG();
281                                         if (PageInactive(page))
282                                                 BUG();
283                                         if (PageDirty(page))
284                                                 BUG();
285
286                                         break;
287                                 }
288                         } while ((entry = entry->next) != local_pages);
289                 }
290
291                 nr_pages = current->nr_local_pages;
292                 /* free in reverse order so that the global order will be lifo */
293                 while ((entry = local_pages->prev) != local_pages) {
294                         list_del(entry);
295                         tmp = list_entry(entry, struct page, list);
296                         __free_pages_ok(tmp, tmp->index);
297                         if (!nr_pages--)
298                                 BUG();
299                 }
300                 current->nr_local_pages = 0;
301         }
302  out:
303         *freed = __freed;
304         return page;
305 }
306
307 static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
308 {
309         long free = zone->free_pages - (1UL << order);
310         return free >= 0 ? free : 0;
311 }
312
313 /*
314  * This is the 'heart' of the zoned buddy allocator:
315  */
316 struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
317 {
318         zone_t **zone, * classzone;
319         struct page * page;
320         int freed;
321
322         zone = zonelist->zones;
323         classzone = *zone;
324         for (;;) {
325                 zone_t *z = *(zone++);
326                 if (!z)
327                         break;
328
329                 if (zone_free_pages(z, order) > z->pages_low) {
330                         page = rmqueue(z, order);
331                         if (page)
332                                 return page;
333                 }
334         }
335
336         classzone->need_balance = 1;
337         mb();
338         if (waitqueue_active(&kswapd_wait))
339                 wake_up_interruptible(&kswapd_wait);
340
341         zone = zonelist->zones;
342         for (;;) {
343                 unsigned long min;
344                 zone_t *z = *(zone++);
345                 if (!z)
346                         break;
347
348                 min = z->pages_min;
349                 if (!(gfp_mask & __GFP_WAIT))
350                         min >>= 2;
351                 if (zone_free_pages(z, order) > min) {
352                         page = rmqueue(z, order);
353                         if (page)
354                                 return page;
355                 }
356         }
357
358         /* here we're in the low on memory slow path */
359
360 rebalance:
361         if (current->flags & PF_MEMALLOC) {
362                 zone = zonelist->zones;
363                 for (;;) {
364                         zone_t *z = *(zone++);
365                         if (!z)
366                                 break;
367
368                         page = rmqueue(z, order);
369                         if (page)
370                                 return page;
371                 }
372                 return NULL;
373         }
374
375         page = balance_classzone(classzone, gfp_mask, order, &freed);
376         if (page)
377                 return page;
378
379         zone = zonelist->zones;
380         for (;;) {
381                 zone_t *z = *(zone++);
382                 if (!z)
383                         break;
384
385                 if (zone_free_pages(z, order) > z->pages_min) {
386                         page = rmqueue(z, order);
387                         if (page)
388                                 return page;
389                 }
390         }
391
392         /* Yield for kswapd, and try again */
393         current->policy |= SCHED_YIELD;
394         __set_current_state(TASK_RUNNING);
395         schedule();
396         goto rebalance;
397 }
398
399 /*
400  * Common helper functions.
401  */
402 unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
403 {
404         struct page * page;
405
406         page = alloc_pages(gfp_mask, order);
407         if (!page)
408                 return 0;
409         return (unsigned long) page_address(page);
410 }
411
412 unsigned long get_zeroed_page(unsigned int gfp_mask)
413 {
414         struct page * page;
415
416         page = alloc_pages(gfp_mask, 0);
417         if (page) {
418                 void *address = page_address(page);
419                 clear_page(address);
420                 return (unsigned long) address;
421         }
422         return 0;
423 }
424
425 void __free_pages(struct page *page, unsigned int order)
426 {
427         if (!PageReserved(page) && put_page_testzero(page))
428                 __free_pages_ok(page, order);
429 }
430
431 void free_pages(unsigned long addr, unsigned int order)
432 {
433         if (addr != 0)
434                 __free_pages(virt_to_page(addr), order);
435 }
436
437 /*
438  * Total amount of free (allocatable) RAM:
439  */
440 unsigned int nr_free_pages (void)
441 {
442         unsigned int sum;
443         zone_t *zone;
444         pg_data_t *pgdat = pgdat_list;
445
446         sum = 0;
447         while (pgdat) {
448                 for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
449                         sum += zone->free_pages;
450                 pgdat = pgdat->node_next;
451         }
452         return sum;
453 }
454
455 /*
456  * Amount of free RAM allocatable as buffer memory:
457  */
458 unsigned int nr_free_buffer_pages (void)
459 {
460         pg_data_t *pgdat = pgdat_list;
461         unsigned int sum = 0;
462         zonelist_t *zonelist;
463         zone_t **zonep, *zone;
464
465         do {
466                 zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
467                 zonep = zonelist->zones;
468
469                 for (zone = *zonep++; zone; zone = *zonep++)
470                         sum += zone->free_pages;
471
472                 pgdat = pgdat->node_next;
473         } while (pgdat);
474
475         return sum + nr_active_pages + nr_inactive_pages;
476 }
477
478 #if CONFIG_HIGHMEM
479 unsigned int nr_free_highpages (void)
480 {
481         pg_data_t *pgdat = pgdat_list;
482         unsigned int pages = 0;
483
484         while (pgdat) {
485                 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
486                 pgdat = pgdat->node_next;
487         }
488         return pages;
489 }
490 #endif
491
492 /*
493  * Show free area list (used inside shift_scroll-lock stuff)
494  * We also calculate the percentage fragmentation. We do this by counting the
495  * memory on each free list with the exception of the first item on the list.
496  */
497 void show_free_areas_core(pg_data_t *pgdat)
498 {
499         unsigned int order;
500         unsigned type;
501         pg_data_t *tmpdat = pgdat;
502
503         printk("Free pages:      %6dkB (%6dkB HighMem)\n",
504                 nr_free_pages() << (PAGE_SHIFT-10),
505                 nr_free_highpages() << (PAGE_SHIFT-10));
506
507         while (tmpdat) {
508                 zone_t *zone;
509                 for (zone = tmpdat->node_zones;
510                                 zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
511                         printk("Zone:%s freepages:%6lukB min:%6luKB low:%6lukB " 
512                                        "high:%6lukB\n", 
513                                         zone->name,
514                                         (zone->free_pages)
515                                         << ((PAGE_SHIFT-10)),
516                                         zone->pages_min
517                                         << ((PAGE_SHIFT-10)),
518                                         zone->pages_low
519                                         << ((PAGE_SHIFT-10)),
520                                         zone->pages_high
521                                         << ((PAGE_SHIFT-10)));
522                         
523                 tmpdat = tmpdat->node_next;
524         }
525
526         printk("Free pages:      %6dkB (%6dkB HighMem)\n",
527                 nr_free_pages() << (PAGE_SHIFT-10),
528                 nr_free_highpages() << (PAGE_SHIFT-10));
529
530         printk("( Active: %d, inactive: %d, free: %d )\n",
531                nr_active_pages,
532                nr_inactive_pages,
533                nr_free_pages());
534
535         for (type = 0; type < MAX_NR_ZONES; type++) {
536                 struct list_head *head, *curr;
537                 zone_t *zone = pgdat->node_zones + type;
538                 unsigned long nr, total, flags;
539
540                 total = 0;
541                 if (zone->size) {
542                         spin_lock_irqsave(&zone->lock, flags);
543                         for (order = 0; order < MAX_ORDER; order++) {
544                                 head = &(zone->free_area + order)->free_list;
545                                 curr = head;
546                                 nr = 0;
547                                 for (;;) {
548                                         curr = memlist_next(curr);
549                                         if (curr == head)
550                                                 break;
551                                         nr++;
552                                 }
553                                 total += nr * (1 << order);
554                                 printk("%lu*%lukB ", nr,
555                                                 (PAGE_SIZE>>10) << order);
556                         }
557                         spin_unlock_irqrestore(&zone->lock, flags);
558                 }
559                 printk("= %lukB)\n", total * (PAGE_SIZE>>10));
560         }
561
562 #ifdef SWAP_CACHE_INFO
563         show_swap_cache_info();
564 #endif  
565 }
566
567 void show_free_areas(void)
568 {
569         show_free_areas_core(pgdat_list);
570 }
571
572 /*
573  * Builds allocation fallback zone lists.
574  */
575 static inline void build_zonelists(pg_data_t *pgdat)
576 {
577         int i, j, k;
578
579         for (i = 0; i <= GFP_ZONEMASK; i++) {
580                 zonelist_t *zonelist;
581                 zone_t *zone;
582
583                 zonelist = pgdat->node_zonelists + i;
584                 memset(zonelist, 0, sizeof(*zonelist));
585
586                 j = 0;
587                 k = ZONE_NORMAL;
588                 if (i & __GFP_HIGHMEM)
589                         k = ZONE_HIGHMEM;
590                 if (i & __GFP_DMA)
591                         k = ZONE_DMA;
592
593                 switch (k) {
594                         default:
595                                 BUG();
596                         /*
597                          * fallthrough:
598                          */
599                         case ZONE_HIGHMEM:
600                                 zone = pgdat->node_zones + ZONE_HIGHMEM;
601                                 if (zone->size) {
602 #ifndef CONFIG_HIGHMEM
603                                         BUG();
604 #endif
605                                         zonelist->zones[j++] = zone;
606                                 }
607                         case ZONE_NORMAL:
608                                 zone = pgdat->node_zones + ZONE_NORMAL;
609                                 if (zone->size)
610                                         zonelist->zones[j++] = zone;
611                         case ZONE_DMA:
612                                 zone = pgdat->node_zones + ZONE_DMA;
613                                 if (zone->size)
614                                         zonelist->zones[j++] = zone;
615                 }
616                 zonelist->zones[j++] = NULL;
617         } 
618 }
619
620 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
621
622 /*
623  * Set up the zone data structures:
624  *   - mark all pages reserved
625  *   - mark all memory queues empty
626  *   - clear the memory bitmaps
627  */
628 void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
629         unsigned long *zones_size, unsigned long zone_start_paddr, 
630         unsigned long *zholes_size, struct page *lmem_map)
631 {
632         struct page *p;
633         unsigned long i, j;
634         unsigned long map_size;
635         unsigned long totalpages, offset, realtotalpages;
636         const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
637
638         if (zone_start_paddr & ~PAGE_MASK)
639                 BUG();
640
641         totalpages = 0;
642         for (i = 0; i < MAX_NR_ZONES; i++) {
643                 unsigned long size = zones_size[i];
644                 totalpages += size;
645         }
646         realtotalpages = totalpages;
647         if (zholes_size)
648                 for (i = 0; i < MAX_NR_ZONES; i++)
649                         realtotalpages -= zholes_size[i];
650                         
651         printk("On node %d totalpages: %lu\n", nid, realtotalpages);
652
653         INIT_LIST_HEAD(&active_list);
654         INIT_LIST_HEAD(&inactive_list);
655
656         /*
657          * Some architectures (with lots of mem and discontinous memory
658          * maps) have to search for a good mem_map area:
659          * For discontigmem, the conceptual mem map array starts from 
660          * PAGE_OFFSET, we need to align the actual array onto a mem map 
661          * boundary, so that MAP_NR works.
662          */
663         map_size = (totalpages + 1)*sizeof(struct page);
664         if (lmem_map == (struct page *)0) {
665                 lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
666                 lmem_map = (struct page *)(PAGE_OFFSET + 
667                         MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
668         }
669         *gmap = pgdat->node_mem_map = lmem_map;
670         pgdat->node_size = totalpages;
671         pgdat->node_start_paddr = zone_start_paddr;
672         pgdat->node_start_mapnr = (lmem_map - mem_map);
673         pgdat->nr_zones = 0;
674
675         /*
676          * Initially all pages are reserved - free ones are freed
677          * up by free_all_bootmem() once the early boot process is
678          * done.
679          */
680         for (p = lmem_map; p < lmem_map + totalpages; p++) {
681                 set_page_count(p, 0);
682                 SetPageReserved(p);
683                 init_waitqueue_head(&p->wait);
684                 memlist_init(&p->list);
685         }
686
687         offset = lmem_map - mem_map;    
688         for (j = 0; j < MAX_NR_ZONES; j++) {
689                 zone_t *zone = pgdat->node_zones + j;
690                 unsigned long mask;
691                 unsigned long size, realsize;
692
693                 realsize = size = zones_size[j];
694                 if (zholes_size)
695                         realsize -= zholes_size[j];
696
697                 printk("zone(%lu): %lu pages.\n", j, size);
698                 zone->size = size;
699                 zone->name = zone_names[j];
700                 zone->lock = SPIN_LOCK_UNLOCKED;
701                 zone->zone_pgdat = pgdat;
702                 zone->free_pages = 0;
703                 zone->need_balance = 0;
704                 if (!size)
705                         continue;
706
707                 pgdat->nr_zones = j+1;
708
709                 mask = (realsize / zone_balance_ratio[j]);
710                 if (mask < zone_balance_min[j])
711                         mask = zone_balance_min[j];
712                 else if (mask > zone_balance_max[j])
713                         mask = zone_balance_max[j];
714                 zone->pages_min = mask;
715                 zone->pages_low = mask*2;
716                 zone->pages_high = mask*3;
717
718                 zone->zone_mem_map = mem_map + offset;
719                 zone->zone_start_mapnr = offset;
720                 zone->zone_start_paddr = zone_start_paddr;
721
722                 if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
723                         printk("BUG: wrong zone alignment, it will crash\n");
724
725                 for (i = 0; i < size; i++) {
726                         struct page *page = mem_map + offset + i;
727                         page->zone = zone;
728                         if (j != ZONE_HIGHMEM)
729                                 page->virtual = __va(zone_start_paddr);
730                         zone_start_paddr += PAGE_SIZE;
731                 }
732
733                 offset += size;
734                 for (i = 0; ; i++) {
735                         unsigned long bitmap_size;
736
737                         memlist_init(&zone->free_area[i].free_list);
738                         if (i == MAX_ORDER-1) {
739                                 zone->free_area[i].map = NULL;
740                                 break;
741                         }
742
743                         /*
744                          * Page buddy system uses "index >> (i+1)",
745                          * where "index" is at most "size-1".
746                          *
747                          * The extra "+3" is to round down to byte
748                          * size (8 bits per byte assumption). Thus
749                          * we get "(size-1) >> (i+4)" as the last byte
750                          * we can access.
751                          *
752                          * The "+1" is because we want to round the
753                          * byte allocation up rather than down. So
754                          * we should have had a "+7" before we shifted
755                          * down by three. Also, we have to add one as
756                          * we actually _use_ the last bit (it's [0,n]
757                          * inclusive, not [0,n[).
758                          *
759                          * So we actually had +7+1 before we shift
760                          * down by 3. But (n+8) >> 3 == (n >> 3) + 1
761                          * (modulo overflows, which we do not have).
762                          *
763                          * Finally, we LONG_ALIGN because all bitmap
764                          * operations are on longs.
765                          */
766                         bitmap_size = (size-1) >> (i+4);
767                         bitmap_size = LONG_ALIGN(bitmap_size+1);
768                         zone->free_area[i].map = 
769                           (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
770                 }
771         }
772         build_zonelists(pgdat);
773 }
774
775 void __init free_area_init(unsigned long *zones_size)
776 {
777         free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
778 }
779
780 static int __init setup_mem_frac(char *str)
781 {
782         int j = 0;
783
784         while (get_option(&str, &zone_balance_ratio[j++]) == 2);
785         printk("setup_mem_frac: ");
786         for (j = 0; j < MAX_NR_ZONES; j++) printk("%d  ", zone_balance_ratio[j]);
787         printk("\n");
788         return 1;
789 }
790
791 __setup("memfrac=", setup_mem_frac);