[PATCH] x86_64 merge: arch + asm
[opensuse:kernel.git] / arch / x86_64 / kernel / setup.c
1 /*
2  *  linux/arch/x86-64/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Nov 2001 Dave Jones <davej@suse.de>
7  *  Forked from i386 setup code.
8  *
9  *  $Id$
10  */
11
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
19 #include <linux/mm.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/apm_bios.h>
32 #ifdef CONFIG_BLK_DEV_RAM
33 #include <linux/blk.h>
34 #endif
35 #include <linux/highmem.h>
36 #include <linux/bootmem.h>
37 #include <asm/processor.h>
38 #include <linux/console.h>
39 #include <linux/seq_file.h>
40 #include <asm/mtrr.h>
41 #include <asm/uaccess.h>
42 #include <asm/system.h>
43 #include <asm/io.h>
44 #include <asm/smp.h>
45 #include <asm/msr.h>
46 #include <asm/desc.h>
47 #include <asm/e820.h>
48 #include <asm/dma.h>
49 #include <asm/mpspec.h>
50 #include <asm/mmu_context.h>
51 #include <asm/bootsetup.h>
52
53 /*
54  * Machine setup..
55  */
56
57 extern void mcheck_init(struct cpuinfo_x86 *c);
58
59 char ignore_irq13;              /* set if exception 16 works */
60 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
61
62 unsigned long mmu_cr4_features;
63
64 /* For PCI or other memory-mapped resources */
65 unsigned long pci_mem_start = 0x10000000;
66
67 /*
68  * Setup options
69  */
70 struct drive_info_struct { char dummy[32]; } drive_info;
71 struct screen_info screen_info;
72 struct apm_info apm_info;
73 struct sys_desc_table_struct {
74         unsigned short length;
75         unsigned char table[0];
76 };
77
78 struct e820map e820;
79
80 unsigned char aux_device_present;
81
82 extern int root_mountflags;
83 extern char _text, _etext, _edata, _end;
84 extern unsigned long cpu_khz;
85
86 static int disable_x86_fxsr __initdata = 0;
87
88 static char command_line[COMMAND_LINE_SIZE];
89        char saved_command_line[COMMAND_LINE_SIZE];
90
91 struct resource standard_io_resources[] = {
92         { "dma1", 0x00, 0x1f, IORESOURCE_BUSY },
93         { "pic1", 0x20, 0x3f, IORESOURCE_BUSY },
94         { "timer", 0x40, 0x5f, IORESOURCE_BUSY },
95         { "keyboard", 0x60, 0x6f, IORESOURCE_BUSY },
96         { "dma page reg", 0x80, 0x8f, IORESOURCE_BUSY },
97         { "pic2", 0xa0, 0xbf, IORESOURCE_BUSY },
98         { "dma2", 0xc0, 0xdf, IORESOURCE_BUSY },
99         { "fpu", 0xf0, 0xff, IORESOURCE_BUSY }
100 };
101
102 #define STANDARD_IO_RESOURCES (sizeof(standard_io_resources)/sizeof(struct resource))
103
104 static struct resource code_resource = { "Kernel code", 0x100000, 0 };
105 static struct resource data_resource = { "Kernel data", 0, 0 };
106 static struct resource vram_resource = { "Video RAM area", 0xa0000, 0xbffff, IORESOURCE_BUSY };
107
108 /* System ROM resources */
109 #define MAXROMS 6
110 static struct resource rom_resources[MAXROMS] = {
111         { "System ROM", 0xF0000, 0xFFFFF, IORESOURCE_BUSY },
112         { "Video ROM", 0xc0000, 0xc7fff, IORESOURCE_BUSY }
113 };
114
115 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
116
117 static void __init probe_roms(void)
118 {
119         int roms = 1;
120         unsigned long base;
121         unsigned char *romstart;
122
123         request_resource(&iomem_resource, rom_resources+0);
124
125         /* Video ROM is standard at C000:0000 - C7FF:0000, check signature */
126         for (base = 0xC0000; base < 0xE0000; base += 2048) {
127                 romstart = isa_bus_to_virt(base);
128                 if (!romsignature(romstart))
129                         continue;
130                 request_resource(&iomem_resource, rom_resources + roms);
131                 roms++;
132                 break;
133         }
134
135         /* Extension roms at C800:0000 - DFFF:0000 */
136         for (base = 0xC8000; base < 0xE0000; base += 2048) {
137                 unsigned long length;
138
139                 romstart = isa_bus_to_virt(base);
140                 if (!romsignature(romstart))
141                         continue;
142                 length = romstart[2] * 512;
143                 if (length) {
144                         unsigned int i;
145                         unsigned char chksum;
146
147                         chksum = 0;
148                         for (i = 0; i < length; i++)
149                                 chksum += romstart[i];
150
151                         /* Good checksum? */
152                         if (!chksum) {
153                                 rom_resources[roms].start = base;
154                                 rom_resources[roms].end = base + length - 1;
155                                 rom_resources[roms].name = "Extension ROM";
156                                 rom_resources[roms].flags = IORESOURCE_BUSY;
157
158                                 request_resource(&iomem_resource, rom_resources + roms);
159                                 roms++;
160                                 if (roms >= MAXROMS)
161                                         return;
162                         }
163                 }
164         }
165
166         /* Final check for motherboard extension rom at E000:0000 */
167         base = 0xE0000;
168         romstart = isa_bus_to_virt(base);
169
170         if (romsignature(romstart)) {
171                 rom_resources[roms].start = base;
172                 rom_resources[roms].end = base + 65535;
173                 rom_resources[roms].name = "Extension ROM";
174                 rom_resources[roms].flags = IORESOURCE_BUSY;
175
176                 request_resource(&iomem_resource, rom_resources + roms);
177         }
178 }
179
180 void __init add_memory_region(unsigned long long start,
181                                   unsigned long long size, int type)
182 {
183         int x = e820.nr_map;
184
185         if (x == E820MAX) {
186             printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
187             return;
188         }
189
190         e820.map[x].addr = start;
191         e820.map[x].size = size;
192         e820.map[x].type = type;
193         e820.nr_map++;
194 } /* add_memory_region */
195
196 #define E820_DEBUG      1
197
198 static void __init print_memory_map(char *who)
199 {
200         int i;
201
202         for (i = 0; i < e820.nr_map; i++) {
203                 printk(" %s: %016Lx - %016Lx ", who,
204                         (unsigned long long)e820.map[i].addr,
205                         (unsigned long long)(e820.map[i].addr + e820.map[i].size));
206                 switch (e820.map[i].type) {
207                 case E820_RAM:  printk("(usable)\n");
208                                 break;
209                 case E820_RESERVED:
210                                 printk("(reserved)\n");
211                                 break;
212                 case E820_ACPI:
213                                 printk("(ACPI data)\n");
214                                 break;
215                 case E820_NVS:
216                                 printk("(ACPI NVS)\n");
217                                 break;
218                 default:        printk("type %lu\n", (unsigned long)e820.map[i].type);
219                                 break;
220                 }
221         }
222 }
223
224 /*
225  * Sanitize the BIOS e820 map.
226  *
227  * Some e820 responses include overlapping entries.  The following 
228  * replaces the original e820 map with a new one, removing overlaps.
229  *
230  */
231 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
232 {
233         struct change_member {
234                 struct e820entry *pbios; /* pointer to original bios entry */
235                 unsigned long long addr; /* address for this change point */
236         };
237         struct change_member change_point_list[2*E820MAX];
238         struct change_member *change_point[2*E820MAX];
239         struct e820entry *overlap_list[E820MAX];
240         struct e820entry new_bios[E820MAX];
241         struct change_member *change_tmp;
242         unsigned long current_type, last_type;
243         unsigned long long last_addr;
244         int chgidx, still_changing;
245         int overlap_entries;
246         int new_bios_entry;
247         int old_nr, new_nr;
248         int i;
249
250         /*
251                 Visually we're performing the following (1,2,3,4 = memory types)...
252
253                 Sample memory map (w/overlaps):
254                    ____22__________________
255                    ______________________4_
256                    ____1111________________
257                    _44_____________________
258                    11111111________________
259                    ____________________33__
260                    ___________44___________
261                    __________33333_________
262                    ______________22________
263                    ___________________2222_
264                    _________111111111______
265                    _____________________11_
266                    _________________4______
267
268                 Sanitized equivalent (no overlap):
269                    1_______________________
270                    _44_____________________
271                    ___1____________________
272                    ____22__________________
273                    ______11________________
274                    _________1______________
275                    __________3_____________
276                    ___________44___________
277                    _____________33_________
278                    _______________2________
279                    ________________1_______
280                    _________________4______
281                    ___________________2____
282                    ____________________33__
283                    ______________________4_
284         */
285
286         /* if there's only one memory region, don't bother */
287         if (*pnr_map < 2)
288                 return -1;
289
290         old_nr = *pnr_map;
291
292         /* bail out if we find any unreasonable addresses in bios map */
293         for (i=0; i<old_nr; i++)
294                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
295                         return -1;
296
297         /* create pointers for initial change-point information (for sorting) */
298         for (i=0; i < 2*old_nr; i++)
299                 change_point[i] = &change_point_list[i];
300
301         /* record all known change-points (starting and ending addresses) */
302         chgidx = 0;
303         for (i=0; i < old_nr; i++)      {
304                 change_point[chgidx]->addr = biosmap[i].addr;
305                 change_point[chgidx++]->pbios = &biosmap[i];
306                 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
307                 change_point[chgidx++]->pbios = &biosmap[i];
308         }
309
310         /* sort change-point list by memory addresses (low -> high) */
311         still_changing = 1;
312         while (still_changing)  {
313                 still_changing = 0;
314                 for (i=1; i < 2*old_nr; i++)  {
315                         /* if <current_addr> > <last_addr>, swap */
316                         /* or, if current=<start_addr> & last=<end_addr>, swap */
317                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
318                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
319                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
320                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
321                            )
322                         {
323                                 change_tmp = change_point[i];
324                                 change_point[i] = change_point[i-1];
325                                 change_point[i-1] = change_tmp;
326                                 still_changing=1;
327                         }
328                 }
329         }
330
331         /* create a new bios memory map, removing overlaps */
332         overlap_entries=0;       /* number of entries in the overlap table */
333         new_bios_entry=0;        /* index for creating new bios map entries */
334         last_type = 0;           /* start with undefined memory type */
335         last_addr = 0;           /* start with 0 as last starting address */
336         /* loop through change-points, determining affect on the new bios map */
337         for (chgidx=0; chgidx < 2*old_nr; chgidx++)
338         {
339                 /* keep track of all overlapping bios entries */
340                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
341                 {
342                         /* add map entry to overlap list (> 1 entry implies an overlap) */
343                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
344                 }
345                 else
346                 {
347                         /* remove entry from list (order independent, so swap with last) */
348                         for (i=0; i<overlap_entries; i++)
349                         {
350                                 if (overlap_list[i] == change_point[chgidx]->pbios)
351                                         overlap_list[i] = overlap_list[overlap_entries-1];
352                         }
353                         overlap_entries--;
354                 }
355                 /* if there are overlapping entries, decide which "type" to use */
356                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
357                 current_type = 0;
358                 for (i=0; i<overlap_entries; i++)
359                         if (overlap_list[i]->type > current_type)
360                                 current_type = overlap_list[i]->type;
361                 /* continue building up new bios map based on this information */
362                 if (current_type != last_type)  {
363                         if (last_type != 0)      {
364                                 new_bios[new_bios_entry].size =
365                                         change_point[chgidx]->addr - last_addr;
366                                 /* move forward only if the new size was non-zero */
367                                 if (new_bios[new_bios_entry].size != 0)
368                                         if (++new_bios_entry >= E820MAX)
369                                                 break;  /* no more space left for new bios entries */
370                         }
371                         if (current_type != 0)  {
372                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
373                                 new_bios[new_bios_entry].type = current_type;
374                                 last_addr=change_point[chgidx]->addr;
375                         }
376                         last_type = current_type;
377                 }
378         }
379         new_nr = new_bios_entry;   /* retain count for new bios entries */
380
381         /* copy new bios mapping into original location */
382         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
383         *pnr_map = new_nr;
384
385         return 0;
386 }
387
388 /*
389  * Copy the BIOS e820 map into a safe place.
390  *
391  * Sanity-check it while we're at it..
392  *
393  * If we're lucky and live on a modern system, the setup code
394  * will have given us a memory map that we can use to properly
395  * set up memory.  If we aren't, we'll fake a memory map.
396  *
397  * We check to see that the memory map contains at least 2 elements
398  * before we'll use it, because the detection code in setup.S may
399  * not be perfect and most every PC known to man has two memory
400  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
401  * thinkpad 560x, for example, does not cooperate with the memory
402  * detection code.)
403  */
404 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
405 {
406         /* Only one memory region (or negative)? Ignore it */
407         if (nr_map < 2)
408                 return -1;
409
410         do {
411                 unsigned long long start = biosmap->addr;
412                 unsigned long long size = biosmap->size;
413                 unsigned long long end = start + size;
414                 unsigned long type = biosmap->type;
415
416                 /* Overflow in 64 bits? Ignore the memory map. */
417                 if (start > end)
418                         return -1;
419
420                 /*
421                  * Some BIOSes claim RAM in the 640k - 1M region.
422                  * Not right. Fix it up.
423                  */
424                 if (type == E820_RAM) {
425                         if (start < 0x100000ULL && end > 0xA0000ULL) {
426                                 if (start < 0xA0000ULL)
427                                         add_memory_region(start, 0xA0000ULL-start, type);
428                                 if (end <= 0x100000ULL)
429                                         continue;
430                                 start = 0x100000ULL;
431                                 size = end - start;
432                         }
433                 }
434                 add_memory_region(start, size, type);
435         } while (biosmap++,--nr_map);
436         return 0;
437 }
438
439 /*
440  * Do NOT EVER look at the BIOS memory size location.
441  * It does not work on many machines.
442  */
443 #define LOWMEMSIZE()    (0x9f000)
444
445 void __init setup_memory_region(void)
446 {
447         char *who = "BIOS-e820";
448
449         /*
450          * Try to copy the BIOS-supplied E820-map.
451          *
452          * Otherwise fake a memory map; one section from 0k->640k,
453          * the next section from 1mb->appropriate_mem_k
454          */
455         sanitize_e820_map(E820_MAP, &E820_MAP_NR);
456         if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
457                 unsigned long mem_size;
458
459                 /* compare results from other methods and take the greater */
460                 if (ALT_MEM_K < EXT_MEM_K) {
461                         mem_size = EXT_MEM_K;
462                         who = "BIOS-88";
463                 } else {
464                         mem_size = ALT_MEM_K;
465                         who = "BIOS-e801";
466                 }
467
468                 e820.nr_map = 0;
469                 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
470                 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
471         }
472         printk(KERN_INFO "BIOS-provided physical RAM map:\n");
473         print_memory_map(who);
474 } /* setup_memory_region */
475
476
477 static inline void parse_mem_cmdline (char ** cmdline_p)
478 {
479         char c = ' ', *to = command_line, *from = COMMAND_LINE;
480         int len = 0;
481         int usermem = 0;
482
483         /* Save unparsed command line copy for /proc/cmdline */
484         memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
485         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
486
487         for (;;) {
488                 /*
489                  * "mem=nopentium" disables the 4MB page tables.
490                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
491                  * to <mem>, overriding the bios size.
492                  * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
493                  * <start> to <start>+<mem>, overriding the bios size.
494                  */
495                 if (c == ' ' && !memcmp(from, "mem=", 4)) {
496                         if (to != command_line)
497                                 to--;
498                         if (!memcmp(from+4, "nopentium", 9)) {
499                                 from += 9+4;
500                                 clear_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability);
501                         } else if (!memcmp(from+4, "exactmap", 8)) {
502                                 from += 8+4;
503                                 e820.nr_map = 0;
504                                 usermem = 1;
505                         } else {
506                                 /* If the user specifies memory size, we
507                                  * blow away any automatically generated
508                                  * size
509                                  */
510                                 unsigned long long start_at, mem_size;
511  
512                                 if (usermem == 0) {
513                                         /* first time in: zap the whitelist
514                                          * and reinitialize it with the
515                                          * standard low-memory region.
516                                          */
517                                         e820.nr_map = 0;
518                                         usermem = 1;
519                                         add_memory_region(0, LOWMEMSIZE(), E820_RAM);
520                                 }
521                                 mem_size = memparse(from+4, &from);
522                                 if (*from == '@')
523                                         start_at = memparse(from+1, &from);
524                                 else {
525                                         start_at = HIGH_MEMORY;
526                                         mem_size -= HIGH_MEMORY;
527                                         usermem=0;
528                                 }
529                                 add_memory_region(start_at, mem_size, E820_RAM);
530                         }
531                 }
532                 c = *(from++);
533                 if (!c)
534                         break;
535                 if (COMMAND_LINE_SIZE <= ++len)
536                         break;
537                 *(to++) = c;
538         }
539         *to = '\0';
540         *cmdline_p = command_line;
541         if (usermem) {
542                 printk(KERN_INFO "user-defined physical RAM map:\n");
543                 print_memory_map("user");
544         }
545 }
546
547 void __init setup_arch(char **cmdline_p)
548 {
549         unsigned long bootmap_size, low_mem_size;
550         unsigned long start_pfn, max_pfn, max_low_pfn;
551         int i;
552
553         ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
554         drive_info = DRIVE_INFO;
555         screen_info = SCREEN_INFO;
556         apm_info.bios = APM_BIOS_INFO;
557         aux_device_present = AUX_DEVICE_INFO;
558
559 #ifdef CONFIG_BLK_DEV_RAM
560         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
561         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
562         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
563 #endif
564         setup_memory_region();
565
566         if (!MOUNT_ROOT_RDONLY)
567                 root_mountflags &= ~MS_RDONLY;
568         init_mm.start_code = (unsigned long) &_text;
569         init_mm.end_code = (unsigned long) &_etext;
570         init_mm.end_data = (unsigned long) &_edata;
571         init_mm.brk = (unsigned long) &_end;
572
573         code_resource.start = virt_to_phys(&_text);
574         code_resource.end = virt_to_phys(&_etext)-1;
575         data_resource.start = virt_to_phys(&_etext);
576         data_resource.end = virt_to_phys(&_edata)-1;
577
578         parse_mem_cmdline(cmdline_p);
579
580 #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
581 #define PFN_DOWN(x)     ((x) >> PAGE_SHIFT)
582 #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
583
584 #define VMALLOC_RESERVE (unsigned long)(4096 << 20)
585 #define MAXMEM          (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
586 #define MAXMEM_PFN      PFN_DOWN(MAXMEM)
587
588         /*
589          * partially used pages are not usable - thus
590          * we are rounding upwards:
591          */
592         start_pfn = PFN_UP(__pa(&_end));
593
594         /*
595          * Find the highest page frame number we have available
596          */
597         max_pfn = 0;
598         for (i = 0; i < e820.nr_map; i++) {
599                 unsigned long start, end;
600                 /* RAM? */
601                 if (e820.map[i].type != E820_RAM)
602                         continue;
603                 start = PFN_UP(e820.map[i].addr);
604                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
605                 if (start >= end)
606                         continue;
607                 if (end > max_pfn)
608                         max_pfn = end;
609         }
610
611         /*
612          * Determine low and high memory ranges:
613          */
614         max_low_pfn = max_pfn;
615         if (max_low_pfn > MAXMEM_PFN) {
616                 max_low_pfn = MAXMEM_PFN;
617         }
618
619
620         /*
621          * Initialize the boot-time allocator (with low memory only):
622          */
623         bootmap_size = init_bootmem(start_pfn, max_low_pfn);
624
625         /*
626          * Register fully available low RAM pages with the bootmem allocator.
627          */
628         for (i = 0; i < e820.nr_map; i++) {
629                 unsigned long curr_pfn, last_pfn, size;
630                 /*
631                  * Reserve usable low memory
632                  */
633                 if (e820.map[i].type != E820_RAM)
634                         continue;
635                 /*
636                  * We are rounding up the start address of usable memory:
637                  */
638                 curr_pfn = PFN_UP(e820.map[i].addr);
639                 if (curr_pfn >= max_low_pfn)
640                         continue;
641                 /*
642                  * ... and at the end of the usable range downwards:
643                  */
644                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
645
646                 if (last_pfn > max_low_pfn)
647                         last_pfn = max_low_pfn;
648
649                 /*
650                  * .. finally, did all the rounding and playing
651                  * around just make the area go away?
652                  */
653                 if (last_pfn <= curr_pfn)
654                         continue;
655
656                 size = last_pfn - curr_pfn;
657                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
658         }
659         /*
660          * Reserve the bootmem bitmap itself as well. We do this in two
661          * steps (first step was init_bootmem()) because this catches
662          * the (very unlikely) case of us accidentally initializing the
663          * bootmem allocator with an invalid RAM area.
664          */
665         reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
666                          bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
667
668         /*
669          * reserve physical page 0 - it's a special BIOS page on many boxes,
670          * enabling clean reboots, SMP operation, laptop functions.
671          */
672         reserve_bootmem(0, PAGE_SIZE);
673
674 #ifdef CONFIG_SMP
675         /*
676          * But first pinch a few for the stack/trampoline stuff
677          * FIXME: Don't need the extra page at 4K, but need to fix
678          * trampoline before removing it. (see the GDT stuff)
679          */
680         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
681 #endif
682
683 #ifdef CONFIG_X86_LOCAL_APIC
684         /*
685          * Find and reserve possible boot-time SMP configuration:
686          */
687         find_smp_config();
688 #endif
689 #ifdef CONFIG_BLK_DEV_INITRD
690         if (LOADER_TYPE && INITRD_START) {
691                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
692                         reserve_bootmem(INITRD_START, INITRD_SIZE);
693                         initrd_start =
694                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
695                         initrd_end = initrd_start+INITRD_SIZE;
696                 }
697                 else {
698                         printk(KERN_ERR "initrd extends beyond end of memory "
699                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
700                             (unsigned long)(INITRD_START + INITRD_SIZE),
701                             (unsigned long)(max_low_pfn << PAGE_SHIFT));
702                         initrd_start = 0;
703                 }
704         }
705 #endif
706
707         /*
708          * NOTE: before this point _nobody_ is allowed to allocate
709          * any memory using the bootmem allocator.
710          */
711
712 #ifdef CONFIG_SMP
713         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
714 #endif
715         paging_init();
716 #ifdef CONFIG_X86_LOCAL_APIC
717         /*
718          * get boot-time SMP configuration:
719          */
720         if (smp_found_config)
721                 get_smp_config();
722         init_apic_mappings();
723 #endif
724
725
726         /*
727          * Request address space for all standard RAM and ROM resources
728          * and also for regions reported as reserved by the e820.
729          */
730         probe_roms();
731         for (i = 0; i < e820.nr_map; i++) {
732                 struct resource *res;
733                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
734                         continue;
735                 res = alloc_bootmem_low(sizeof(struct resource));
736                 switch (e820.map[i].type) {
737                 case E820_RAM:  res->name = "System RAM"; break;
738                 case E820_ACPI: res->name = "ACPI Tables"; break;
739                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
740                 default:        res->name = "reserved";
741                 }
742                 res->start = e820.map[i].addr;
743                 res->end = res->start + e820.map[i].size - 1;
744                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
745                 request_resource(&iomem_resource, res);
746                 if (e820.map[i].type == E820_RAM) {
747                         /*
748                          *  We dont't know which RAM region contains kernel data,
749                          *  so we try it repeatedly and let the resource manager
750                          *  test it.
751                          */
752                         request_resource(res, &code_resource);
753                         request_resource(res, &data_resource);
754                 }
755         }
756         request_resource(&iomem_resource, &vram_resource);
757
758         /* request I/O space for devices used on all i[345]86 PCs */
759         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
760                 request_resource(&ioport_resource, standard_io_resources+i);
761
762         /* Tell the PCI layer not to allocate too close to the RAM area.. */
763         low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
764         if (low_mem_size > pci_mem_start)
765                 pci_mem_start = low_mem_size;
766
767 #ifdef CONFIG_VT
768 #if defined(CONFIG_VGA_CONSOLE)
769         conswitchp = &vga_con;
770 #elif defined(CONFIG_DUMMY_CONSOLE)
771         conswitchp = &dummy_con;
772 #endif
773 #endif
774 }
775
776 #ifndef CONFIG_X86_TSC
777 static int tsc_disable __initdata = 0;
778
779 static int __init tsc_setup(char *str)
780 {
781         tsc_disable = 1;
782         return 1;
783 }
784
785 __setup("notsc", tsc_setup);
786 #endif
787
788 static int __init get_model_name(struct cpuinfo_x86 *c)
789 {
790         unsigned int *v;
791
792         if (cpuid_eax(0x80000000) < 0x80000004)
793                 return 0;
794
795         v = (unsigned int *) c->x86_model_id;
796         cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
797         cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
798         cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
799         c->x86_model_id[48] = 0;
800         return 1;
801 }
802
803
804 static void __init display_cacheinfo(struct cpuinfo_x86 *c)
805 {
806         unsigned int n, dummy, ecx, edx;
807
808         n = cpuid_eax(0x80000000);
809
810         if (n >= 0x80000005) {
811                 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
812                 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
813                         edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
814                 c->x86_cache_size=(ecx>>24)+(edx>>24);  
815         }
816
817         if (n < 0x80000006)
818                 return;
819
820         ecx = cpuid_ecx(0x80000006);
821         c->x86_cache_size = ecx >> 16;
822
823         printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
824                 c->x86_cache_size, ecx & 0xFF);
825 }
826
827
828 static int __init init_amd(struct cpuinfo_x86 *c)
829 {
830         int r;
831
832         /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
833            3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
834         clear_bit(0*32+31, &c->x86_capability);
835         
836         r = get_model_name(c);
837         display_cacheinfo(c);
838         return r;
839 }
840
841
842 void __init get_cpu_vendor(struct cpuinfo_x86 *c)
843 {
844         char *v = c->x86_vendor_id;
845
846         if (!strcmp(v, "AuthenticAMD"))
847                 c->x86_vendor = X86_VENDOR_AMD;
848         else
849                 c->x86_vendor = X86_VENDOR_UNKNOWN;
850 }
851
852 struct cpu_model_info {
853         int vendor;
854         int family;
855         char *model_names[16];
856 };
857
858 int __init x86_fxsr_setup(char * s)
859 {
860         disable_x86_fxsr = 1;
861         return 1;
862 }
863 __setup("nofxsr", x86_fxsr_setup);
864
865
866
867 /*
868  * This does the hard work of actually picking apart the CPU stuff...
869  */
870 void __init identify_cpu(struct cpuinfo_x86 *c)
871 {
872         int junk, i;
873         u32 xlvl, tfms;
874
875         c->loops_per_jiffy = loops_per_jiffy;
876         c->x86_cache_size = -1;
877         c->x86_vendor = X86_VENDOR_UNKNOWN;
878         c->x86_model = c->x86_mask = 0; /* So far unknown... */
879         c->x86_vendor_id[0] = '\0'; /* Unset */
880         c->x86_model_id[0] = '\0';  /* Unset */
881         memset(&c->x86_capability, 0, sizeof c->x86_capability);
882
883         /* Get vendor name */
884         cpuid(0x00000000, &c->cpuid_level,
885               (int *)&c->x86_vendor_id[0],
886               (int *)&c->x86_vendor_id[8],
887               (int *)&c->x86_vendor_id[4]);
888                 
889         get_cpu_vendor(c);
890         /* Initialize the standard set of capabilities */
891         /* Note that the vendor-specific code below might override */
892
893         /* Intel-defined flags: level 0x00000001 */
894         if ( c->cpuid_level >= 0x00000001 ) {
895                 cpuid(0x00000001, &tfms, &junk, &junk,
896                       &c->x86_capability[0]);
897                 c->x86 = (tfms >> 8) & 15;
898                 c->x86_model = (tfms >> 4) & 15;
899                 c->x86_mask = tfms & 15;
900         } else {
901                 /* Have CPUID level 0 only - unheard of */
902                 c->x86 = 4;
903         }
904
905         /* AMD-defined flags: level 0x80000001 */
906         xlvl = cpuid_eax(0x80000000);
907         if ( (xlvl & 0xffff0000) == 0x80000000 ) {
908                 if ( xlvl >= 0x80000001 )
909                         c->x86_capability[1] = cpuid_edx(0x80000001);
910                 if ( xlvl >= 0x80000004 )
911                         get_model_name(c); /* Default name */
912         }
913
914         /* Transmeta-defined flags: level 0x80860001 */
915         xlvl = cpuid_eax(0x80860000);
916         if ( (xlvl & 0xffff0000) == 0x80860000 ) {
917                 if (  xlvl >= 0x80860001 )
918                         c->x86_capability[2] = cpuid_edx(0x80860001);
919         }
920
921
922         printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
923                c->x86_capability[0],
924                c->x86_capability[1],
925                c->x86_capability[2],
926                c->x86_vendor);
927
928         /*
929          * Vendor-specific initialization.  In this section we
930          * canonicalize the feature flags, meaning if there are
931          * features a certain CPU supports which CPUID doesn't
932          * tell us, CPUID claiming incorrect flags, or other bugs,
933          * we handle them here.
934          *
935          * At the end of this section, c->x86_capability better
936          * indicate the features this CPU genuinely supports!
937          */
938         switch ( c->x86_vendor ) {
939
940                 case X86_VENDOR_AMD:
941                         init_amd(c);
942                         break;
943
944                 case X86_VENDOR_UNKNOWN:
945                 default:
946                         /* Not much we can do here... */
947                         break;
948         }
949         
950         printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n",
951                c->x86_capability[0],
952                c->x86_capability[1],
953                c->x86_capability[2],
954                c->x86_capability[3]);
955
956         /*
957          * The vendor-specific functions might have changed features.  Now
958          * we do "generic changes."
959          */
960
961         /* TSC disabled? */
962 #ifndef CONFIG_X86_TSC
963         if ( tsc_disable )
964                 clear_bit(X86_FEATURE_TSC, &c->x86_capability);
965 #endif
966
967         /* FXSR disabled? */
968         if (disable_x86_fxsr) {
969                 clear_bit(X86_FEATURE_FXSR, &c->x86_capability);
970                 clear_bit(X86_FEATURE_XMM, &c->x86_capability);
971         }
972
973         /* Now the feature flags better reflect actual CPU features! */
974
975         printk(KERN_DEBUG "CPU:     After generic, caps: %08x %08x %08x %08x\n",
976                c->x86_capability[0],
977                c->x86_capability[1],
978                c->x86_capability[2],
979                c->x86_capability[3]);
980
981         /*
982          * On SMP, boot_cpu_data holds the common feature set between
983          * all CPUs; so make sure that we indicate which features are
984          * common between the CPUs.  The first time this routine gets
985          * executed, c == &boot_cpu_data.
986          */
987         if ( c != &boot_cpu_data ) {
988                 /* AND the already accumulated flags with these */
989                 for ( i = 0 ; i < NCAPINTS ; i++ )
990                         boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
991         }
992
993         printk(KERN_DEBUG "CPU:             Common caps: %08x %08x %08x %08x\n",
994                boot_cpu_data.x86_capability[0],
995                boot_cpu_data.x86_capability[1],
996                boot_cpu_data.x86_capability[2],
997                boot_cpu_data.x86_capability[3]);
998 }
999  
1000
1001 void __init print_cpu_info(struct cpuinfo_x86 *c)
1002 {
1003         if (c->x86_model_id[0])
1004                 printk("AMD %s", c->x86_model_id);
1005
1006         if (c->x86_mask || c->cpuid_level >= 0) 
1007                 printk(" stepping %02x\n", c->x86_mask);
1008         else
1009                 printk("\n");
1010 }
1011
1012 /*
1013  *      Get CPU information for use by the procfs.
1014  */
1015
1016 static int show_cpuinfo(struct seq_file *m, void *v)
1017 {
1018         struct cpuinfo_x86 *c = v;
1019         int index = c - cpu_data;
1020
1021         /* 
1022          * These flag bits must match the definitions in <asm/cpufeature.h>.
1023          * NULL means this bit is undefined or reserved; either way it doesn't
1024          * have meaning as far as Linux is concerned.  Note that it's important
1025          * to realize there is a difference between this table and CPUID -- if
1026          * applications want to get the raw CPUID data, they should access
1027          * /dev/cpu/<cpu_nr>/cpuid instead.
1028          */
1029         static char *x86_cap_flags[] = {
1030                 /* Intel-defined */
1031                 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1032                 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1033                 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1034                 "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL,
1035
1036                 /* AMD-defined */
1037                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1038                 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1039                 NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL,
1040                 NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
1041
1042                 /* Transmeta-defined */
1043                 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1044                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1045                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1046                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1047
1048                 /* Other (Linux-defined) */
1049                 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
1050                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1051                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1052                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1053         };
1054
1055 #ifdef CONFIG_SMP
1056         if (!(cpu_online_map & (1<<(c-cpu_data))))
1057                 return 0;
1058 #endif
1059
1060         seq_printf(m,"processor\t: %d\n"
1061                      "vendor_id\t: %s\n"
1062                      "cpu family\t: %d\n"
1063                      "model\t\t: %d\n"
1064                      "model name\t: %s\n",
1065                    index,
1066                      c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1067                      c->x86,
1068                      c->x86_model,
1069                      c->x86_model_id[0] ? c->x86_model_id : "unknown");
1070         
1071         if (c->x86_mask || c->cpuid_level >= 0)
1072                 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1073         else
1074                 seq_printf(m, "stepping\t: unknown\n");
1075         
1076         if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
1077                 seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
1078                              cpu_khz / 1000, (cpu_khz % 1000));
1079         }
1080
1081         /* Cache size */
1082         if (c->x86_cache_size >= 0) 
1083                 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1084         
1085         seq_printf(m,
1086                 "fpu\t\t: yes\n"
1087                 "fpu_exception\t: yes\n"
1088                 "cpuid level\t: %d\n"
1089                 "wp\t\t: yes\n"
1090                 "flags\t\t:",
1091                    c->cpuid_level);
1092
1093         { 
1094                 int i; 
1095                 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1096                         if ( test_bit(i, &c->x86_capability) &&
1097                              x86_cap_flags[i] != NULL )
1098                                 seq_printf(m, " %s", x86_cap_flags[i]);
1099         }
1100                 
1101         seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
1102                    c->loops_per_jiffy/(500000/HZ),
1103                    (c->loops_per_jiffy/(5000/HZ)) % 100);
1104
1105         return 0;
1106 }
1107
1108 static void *c_start(struct seq_file *m, loff_t *pos)
1109 {
1110         return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1111 }
1112
1113 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1114 {
1115         ++*pos;
1116         return c_start(m, pos);
1117 }
1118
1119 static void c_stop(struct seq_file *m, void *v)
1120 {
1121 }
1122
1123 struct seq_operations cpuinfo_op = {
1124         start:  c_start,
1125         next:   c_next,
1126         stop:   c_stop,
1127         show:   show_cpuinfo,
1128 };