v2.5.0.7 -> v2.5.0.8
[opensuse:kernel.git] / drivers / message / i2o / i2o_block.c
1 /*
2  * I2O Random Block Storage Class OSM
3  *
4  * (C) Copyright 1999 Red Hat Software
5  *      
6  * Written by Alan Cox, Building Number Three Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  *
13  * This is a beta test release. Most of the good code was taken
14  * from the nbd driver by Pavel Machek, who in turn took some of it
15  * from loop.c. Isn't free software great for reusability 8)
16  *
17  * Fixes/additions:
18  *      Steve Ralston:  
19  *              Multiple device handling error fixes,
20  *              Added a queue depth.
21  *      Alan Cox:       
22  *              FC920 has an rmw bug. Dont or in the end marker.
23  *              Removed queue walk, fixed for 64bitness.
24  *      Deepak Saxena:
25  *              Independent queues per IOP
26  *              Support for dynamic device creation/deletion
27  *              Code cleanup    
28  *              Support for larger I/Os through merge* functions 
29  *              (taken from DAC960 driver)
30  *      Boji T Kannanthanam:
31  *              Set the I2O Block devices to be detected in increasing 
32  *              order of TIDs during boot.
33  *              Search and set the I2O block device that we boot off from  as
34  *              the first device to be claimed (as /dev/i2o/hda)
35  *              Properly attach/detach I2O gendisk structure from the system
36  *              gendisk list. The I2O block devices now appear in 
37  *              /proc/partitions.
38  *
39  *      To do:
40  *              Serial number scanning to find duplicates for FC multipathing
41  */
42
43 #include <linux/major.h>
44
45 #include <linux/module.h>
46
47 #include <linux/sched.h>
48 #include <linux/fs.h>
49 #include <linux/stat.h>
50 #include <linux/pci.h>
51 #include <linux/errno.h>
52 #include <linux/file.h>
53 #include <linux/ioctl.h>
54 #include <linux/i2o.h>
55 #include <linux/blkdev.h>
56 #include <linux/blkpg.h>
57 #include <linux/slab.h>
58 #include <linux/hdreg.h>
59 #include <linux/spinlock.h>
60
61 #include <linux/notifier.h>
62 #include <linux/reboot.h>
63
64 #include <asm/uaccess.h>
65 #include <asm/semaphore.h>
66 #include <linux/completion.h>
67 #include <asm/io.h>
68 #include <asm/atomic.h>
69 #include <linux/smp_lock.h>
70 #include <linux/wait.h>
71
72 #define MAJOR_NR I2O_MAJOR
73
74 #include <linux/blk.h>
75
76 #define MAX_I2OB        16
77
78 #define MAX_I2OB_DEPTH  128
79 #define MAX_I2OB_RETRIES 4
80
81 //#define DRIVERDEBUG
82 #ifdef DRIVERDEBUG
83 #define DEBUG( s ) printk( s )
84 #else
85 #define DEBUG( s )
86 #endif
87
88 /*
89  * Events that this OSM is interested in
90  */
91 #define I2OB_EVENT_MASK         (I2O_EVT_IND_BSA_VOLUME_LOAD |  \
92                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
93                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
94                                  I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
95                                  I2O_EVT_IND_BSA_SCSI_SMART )
96
97
98 /*
99  * I2O Block Error Codes - should be in a header file really...
100  */
101 #define I2O_BSA_DSC_SUCCESS             0x0000
102 #define I2O_BSA_DSC_MEDIA_ERROR         0x0001
103 #define I2O_BSA_DSC_ACCESS_ERROR        0x0002
104 #define I2O_BSA_DSC_DEVICE_FAILURE      0x0003
105 #define I2O_BSA_DSC_DEVICE_NOT_READY    0x0004
106 #define I2O_BSA_DSC_MEDIA_NOT_PRESENT   0x0005
107 #define I2O_BSA_DSC_MEDIA_LOCKED        0x0006
108 #define I2O_BSA_DSC_MEDIA_FAILURE       0x0007
109 #define I2O_BSA_DSC_PROTOCOL_FAILURE    0x0008
110 #define I2O_BSA_DSC_BUS_FAILURE         0x0009
111 #define I2O_BSA_DSC_ACCESS_VIOLATION    0x000A
112 #define I2O_BSA_DSC_WRITE_PROTECTED     0x000B
113 #define I2O_BSA_DSC_DEVICE_RESET        0x000C
114 #define I2O_BSA_DSC_VOLUME_CHANGED      0x000D
115 #define I2O_BSA_DSC_TIMEOUT             0x000E
116
117 #define I2O_UNIT(dev)   (i2ob_dev[MINOR((dev)) & 0xf0])
118 #define I2O_LOCK(unit)  (i2ob_dev[(unit)].req_queue->queue_lock)
119
120 /*
121  *      Some of these can be made smaller later
122  */
123
124 static int i2ob_blksizes[MAX_I2OB<<4];
125 static int i2ob_sizes[MAX_I2OB<<4];
126 static int i2ob_media_change_flag[MAX_I2OB];
127
128 static int i2ob_context;
129
130 /*
131  * I2O Block device descriptor 
132  */
133 struct i2ob_device
134 {
135         struct i2o_controller *controller;
136         struct i2o_device *i2odev;
137         int unit;
138         int tid;
139         int flags;
140         int refcnt;
141         struct request *head, *tail;
142         request_queue_t *req_queue;
143         int max_segments;
144         int done_flag;
145         int constipated;
146         int depth;
147 };
148
149 /*
150  *      FIXME:
151  *      We should cache align these to avoid ping-ponging lines on SMP
152  *      boxes under heavy I/O load...
153  */
154 struct i2ob_request
155 {
156         struct i2ob_request *next;
157         struct request *req;
158         int num;
159 };
160
161 /*
162  * Per IOP requst queue information
163  *
164  * We have a separate requeust_queue_t per IOP so that a heavilly
165  * loaded I2O block device on an IOP does not starve block devices
166  * across all I2O controllers.
167  * 
168  */
169 struct i2ob_iop_queue
170 {
171         atomic_t queue_depth;
172         struct i2ob_request request_queue[MAX_I2OB_DEPTH];
173         struct i2ob_request *i2ob_qhead;
174         request_queue_t req_queue;
175 };
176 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
177 static struct i2ob_request *i2ob_backlog[MAX_I2O_CONTROLLERS];
178 static struct i2ob_request *i2ob_backlog_tail[MAX_I2O_CONTROLLERS];
179
180 /*
181  *      Each I2O disk is one of these.
182  */
183
184 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
185 static int i2ob_dev_count = 0;
186 static struct hd_struct i2ob[MAX_I2OB<<4];
187 static struct gendisk i2ob_gendisk;     /* Declared later */
188
189 /*
190  * Mutex and spin lock for event handling synchronization
191  * evt_msg contains the last event.
192  */
193 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
194 static DECLARE_COMPLETION(i2ob_thread_dead);
195 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
196 static u32 evt_msg[MSG_FRAME_SIZE>>2];
197
198 static struct timer_list i2ob_timer;
199 static int i2ob_timer_started = 0;
200
201 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
202          struct i2o_message *);
203 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
204 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
205 static void i2ob_reboot_event(void);
206 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
207 static void i2ob_end_request(struct request *);
208 static void i2ob_request(request_queue_t *);
209 static int i2ob_backlog_request(struct i2o_controller *, struct i2ob_device *);
210 static int i2ob_init_iop(unsigned int);
211 static request_queue_t* i2ob_get_queue(kdev_t);
212 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
213 static int do_i2ob_revalidate(kdev_t, int);
214 static int i2ob_evt(void *);
215
216 static int evt_pid = 0;
217 static int evt_running = 0;
218 static int scan_unit = 0;
219
220 /*
221  * I2O OSM registration structure...keeps getting bigger and bigger :)
222  */
223 static struct i2o_handler i2o_block_handler =
224 {
225         i2o_block_reply,
226         i2ob_new_device,
227         i2ob_del_device,
228         i2ob_reboot_event,
229         "I2O Block OSM",
230         0,
231         I2O_CLASS_RANDOM_BLOCK_STORAGE
232 };
233
234 /*
235  *      Get a message
236  */
237
238 static u32 i2ob_get(struct i2ob_device *dev)
239 {
240         struct i2o_controller *c=dev->controller;
241         return I2O_POST_READ32(c);
242 }
243  
244 /*
245  *      Turn a Linux block request into an I2O block read/write.
246  */
247
248 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit)
249 {
250         struct i2o_controller *c = dev->controller;
251         int tid = dev->tid;
252         unsigned long msg;
253         unsigned long mptr;
254         u64 offset;
255         struct request *req = ireq->req;
256         struct bio *bio = req->bio;
257         int count = req->nr_sectors<<9;
258         unsigned long last = ~0UL;
259         unsigned short size = 0;
260
261         // printk(KERN_INFO "i2ob_send called\n");
262         /* Map the message to a virtual address */
263         msg = c->mem_offset + m;
264         
265         /*
266          * Build the message based on the request.
267          */
268         __raw_writel(i2ob_context|(unit<<8), msg+8);
269         __raw_writel(ireq->num, msg+12);
270         __raw_writel(req->nr_sectors << 9, msg+20);
271
272         /* 
273          * Mask out partitions from now on
274          */
275         unit &= 0xF0;
276                 
277         /* This can be optimised later - just want to be sure its right for
278            starters */
279         offset = ((u64)(req->sector+base)) << 9;
280         __raw_writel( offset & 0xFFFFFFFF, msg+24);
281         __raw_writel(offset>>32, msg+28);
282         mptr=msg+32;
283         
284         if(req->cmd == READ)
285         {
286                 __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
287                 while(bio)
288                 {
289                         if (bio_to_phys(bio) == last) {
290                                 size += bio->bi_size;
291                                 last += bio->bi_size;
292                                 if(bio->bi_next)
293                                         __raw_writel(0x14000000|(size), mptr-8);
294                                 else
295                                         __raw_writel(0xD4000000|(size), mptr-8);
296                         }
297                         else
298                         {
299                                 if(bio->bi_next)
300                                         __raw_writel(0x10000000|bio->bi_size, mptr);
301                                 else
302                                         __raw_writel(0xD0000000|bio->bi_size, mptr);
303                                 __raw_writel(bio_to_phys(bio), mptr+4);
304                                 mptr += 8;      
305                                 size = bio->bi_size;
306                                 last = bio_to_phys(bio) + bio->bi_size;
307                         }
308
309                         count -= bio->bi_size;
310                         bio = bio->bi_next;
311                 }
312                 /*
313                  *      Heuristic for now since the block layer doesnt give
314                  *      us enough info. If its a big write assume sequential
315                  *      readahead on controller. If its small then don't read
316                  *      ahead but do use the controller cache.
317                  */
318                 if(size >= 8192)
319                         __raw_writel((8<<24)|(1<<16)|8, msg+16);
320                 else
321                         __raw_writel((8<<24)|(1<<16)|4, msg+16);
322         }
323         else if(req->cmd == WRITE)
324         {
325                 __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
326                 while(bio)
327                 {
328                         if (bio_to_phys(bio) == last) {
329                                 size += bio->bi_size;
330                                 last += bio->bi_size;
331                                 if(bio->bi_next)
332                                         __raw_writel(0x14000000|(size), mptr-8);
333                                 else
334                                         __raw_writel(0xD4000000|(size), mptr-8);
335                         }
336                         else
337                         {
338                                 if(bio->bi_next)
339                                         __raw_writel(0x14000000|bio->bi_size, mptr);
340                                 else
341                                         __raw_writel(0xD4000000|bio->bi_size, mptr);
342                                 __raw_writel(bio_to_phys(bio), mptr+4);
343                                 mptr += 8;      
344                                 size = bio->bi_size;
345                                 last = bio_to_phys(bio) + bio->bi_size;
346                         }
347
348                         count -= bio->bi_size;
349                         bio = bio->bi_next;
350                 }
351
352                 if(c->battery)
353                 {
354                         
355                         if(size>16384)
356                                 __raw_writel(4, msg+16);
357                         else
358                                 /* 
359                                  * Allow replies to come back once data is cached in the controller
360                                  * This allows us to handle writes quickly thus giving more of the
361                                  * queue to reads.
362                                  */
363                                 __raw_writel(16, msg+16);
364                 }
365                 else
366                 {
367                         /* Large write, don't cache */
368                         if(size>8192)
369                                 __raw_writel(4, msg+16);
370                         else
371                         /* write through */
372                                 __raw_writel(8, msg+16);
373                 }
374         }
375         __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
376         
377         if(count != 0)
378         {
379                 printk(KERN_ERR "Request count botched by %d.\n", count);
380         }
381
382         i2o_post_message(c,m);
383         atomic_inc(&i2ob_queues[c->unit]->queue_depth);
384
385         return 0;
386 }
387
388 /*
389  *      Remove a request from the _locked_ request list. We update both the
390  *      list chain and if this is the last item the tail pointer. Caller
391  *      must hold the lock.
392  */
393  
394 static inline void i2ob_unhook_request(struct i2ob_request *ireq, 
395         unsigned int iop)
396 {
397         ireq->next = i2ob_queues[iop]->i2ob_qhead;
398         i2ob_queues[iop]->i2ob_qhead = ireq;
399 }
400
401 /*
402  *      Request completion handler
403  */
404  
405 static inline void i2ob_end_request(struct request *req)
406 {
407         /*
408          * Loop until all of the buffers that are linked
409          * to this request have been marked updated and
410          * unlocked.
411          */
412
413         while (end_that_request_first(req, !req->errors, req->hard_cur_sectors))
414                 ;
415
416         /*
417          * It is now ok to complete the request.
418          */
419         end_that_request_last( req );
420 }
421
422 static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
423 {
424         unsigned long msg;
425         u32 m = i2ob_get(d);
426         
427         if(m == 0xFFFFFFFF)
428                 return -1;
429                 
430         msg = c->mem_offset + m;
431
432         /*
433          *      Ask the controller to write the cache back. This sorts out
434          *      the supertrak firmware flaw and also does roughly the right
435          *      thing for other cases too.
436          */
437                 
438         __raw_writel(FIVE_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
439         __raw_writel(I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|d->tid, msg+4);
440         __raw_writel(i2ob_context|(unit<<8), msg+8);
441         __raw_writel(0, msg+12);
442         __raw_writel(60<<16, msg+16);
443         
444         i2o_post_message(c,m);
445         return 0;
446 }
447                         
448 /*
449  *      OSM reply handler. This gets all the message replies
450  */
451
452 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
453 {
454         unsigned long flags;
455         struct i2ob_request *ireq = NULL;
456         u8 st;
457         u32 *m = (u32 *)msg;
458         u8 unit = (m[2]>>8)&0xF0;       /* low 4 bits are partition */
459         struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
460
461         /*
462          * FAILed message
463          */
464         if(m[0] & (1<<13))
465         {
466                 /*
467                  * FAILed message from controller
468                  * We increment the error count and abort it
469                  *
470                  * In theory this will never happen.  The I2O block class
471                  * speficiation states that block devices never return
472                  * FAILs but instead use the REQ status field...but
473                  * better be on the safe side since no one really follows
474                  * the spec to the book :)
475                  */
476                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
477                 ireq->req->errors++;
478
479                 spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
480                 i2ob_unhook_request(ireq, c->unit);
481                 i2ob_end_request(ireq->req);
482                 spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
483         
484                 /* Now flush the message by making it a NOP */
485                 m[0]&=0x00FFFFFF;
486                 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
487                 i2o_post_message(c,virt_to_bus(m));
488
489                 return;
490         }
491
492         if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
493         {
494                 spin_lock(&i2ob_evt_lock);
495                 memcpy(evt_msg, msg, (m[0]>>16)<<2);
496                 spin_unlock(&i2ob_evt_lock);
497                 up(&i2ob_evt_sem);
498                 return;
499         }
500
501         if(msg->function == I2O_CMD_BLOCK_CFLUSH)
502         {
503                 spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
504                 dev->constipated=0;
505                 DEBUG(("unconstipated\n"));
506                 if(i2ob_backlog_request(c, dev)==0)
507                         i2ob_request(dev->req_queue);
508                 spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
509                 return;
510         }
511
512         if(!dev->i2odev)
513         {
514                 /*
515                  * This is HACK, but Intel Integrated RAID allows user
516                  * to delete a volume that is claimed, locked, and in use 
517                  * by the OS. We have to check for a reply from a
518                  * non-existent device and flag it as an error or the system 
519                  * goes kaput...
520                  */
521                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
522                 ireq->req->errors++;
523                 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
524                 spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
525                 i2ob_unhook_request(ireq, c->unit);
526                 i2ob_end_request(ireq->req);
527                 spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
528                 return;
529         }       
530
531         /*
532          *      Lets see what is cooking. We stuffed the
533          *      request in the context.
534          */
535                  
536         ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
537         st=m[4]>>24;
538
539         if(st!=0)
540         {
541                 int err;
542                 char *bsa_errors[] = 
543                 { 
544                         "Success", 
545                         "Media Error", 
546                         "Failure communicating to device",
547                         "Device Failure",
548                         "Device is not ready",
549                         "Media not present",
550                         "Media is locked by another user",
551                         "Media has failed",
552                         "Failure communicating to device",
553                         "Device bus failure",
554                         "Device is locked by another user",
555                         "Device is write protected",
556                         "Device has reset",
557                         "Volume has changed, waiting for acknowledgement"
558                 };
559                                 
560                 err = m[4]&0xFFFF;
561                 
562                 /*
563                  *      Device not ready means two things. One is that the
564                  *      the thing went offline (but not a removal media)
565                  *
566                  *      The second is that you have a SuperTrak 100 and the
567                  *      firmware got constipated. Unlike standard i2o card
568                  *      setups the supertrak returns an error rather than
569                  *      blocking for the timeout in these cases.
570                  */
571                  
572                 
573                 spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
574                 if(err==4)
575                 {
576                         /*
577                          *      Time to uncork stuff
578                          */
579                         
580                         if(!dev->constipated)
581                         {
582                                 dev->constipated = 1;
583                                 DEBUG(("constipated\n"));
584                                 /* Now pull the chain */
585                                 if(i2ob_flush(c, dev, unit)<0)
586                                 {
587                                         DEBUG(("i2ob: Unable to queue flush. Retrying I/O immediately.\n"));
588                                         dev->constipated=0;
589                                 }
590                                 DEBUG(("flushing\n"));
591                         }
592                         
593                         /*
594                          *      Recycle the request
595                          */
596                          
597 //                      i2ob_unhook_request(ireq, c->unit);
598                         
599                         /*
600                          *      Place it on the recycle queue
601                          */
602                          
603                         ireq->next = NULL;
604                         if(i2ob_backlog_tail[c->unit]!=NULL)
605                                 i2ob_backlog_tail[c->unit]->next = ireq;
606                         else
607                                 i2ob_backlog[c->unit] = ireq;                   
608                         i2ob_backlog_tail[c->unit] = ireq;
609                         
610                         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
611
612                         /*
613                          *      If the constipator flush failed we want to
614                          *      poke the queue again. 
615                          */
616                          
617                         i2ob_request(dev->req_queue);
618                         spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
619                         
620                         /*
621                          *      and out
622                          */
623                          
624                         return; 
625                 }
626                 spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
627                 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, 
628                         bsa_errors[m[4]&0XFFFF]);
629                 if(m[4]&0x00FF0000)
630                         printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
631                 printk(".\n");
632                 ireq->req->errors++;    
633         }
634         else
635                 ireq->req->errors = 0;
636
637         /*
638          *      Dequeue the request. We use irqsave locks as one day we
639          *      may be running polled controllers from a BH...
640          */
641
642         spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
643         i2ob_unhook_request(ireq, c->unit);
644         i2ob_end_request(ireq->req);
645         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
646
647         /*
648          *      We may be able to do more I/O
649          */
650
651         if(i2ob_backlog_request(c, dev)==0)
652                 i2ob_request(dev->req_queue);
653
654         spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
655 }
656
657 /* 
658  * Event handler.  Needs to be a separate thread b/c we may have
659  * to do things like scan a partition table, or query parameters
660  * which cannot be done from an interrupt or from a bottom half.
661  */
662 static int i2ob_evt(void *dummy)
663 {
664         unsigned int evt;
665         unsigned long flags;
666         int unit;
667         int i;
668         //The only event that has data is the SCSI_SMART event.
669         struct i2o_reply {
670                 u32 header[4];
671                 u32 evt_indicator;
672                 u8 ASC;
673                 u8 ASCQ;
674                 u8 data[16];
675                 } *evt_local;
676
677         lock_kernel();
678         daemonize();
679         unlock_kernel();
680
681         strcpy(current->comm, "i2oblock");
682         evt_running = 1;
683
684         while(1)
685         {
686                 if(down_interruptible(&i2ob_evt_sem))
687                 {
688                         evt_running = 0;
689                         printk("exiting...");
690                         break;
691                 }
692
693                 /*
694                  * Keep another CPU/interrupt from overwriting the 
695                  * message while we're reading it
696                  *
697                  * We stuffed the unit in the TxContext and grab the event mask
698                  * None of the BSA we care about events have EventData
699                  */
700                 spin_lock_irqsave(&i2ob_evt_lock, flags);
701                 evt_local = (struct i2o_reply *)evt_msg;
702                 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
703
704                 unit = evt_local->header[3];
705                 evt = evt_local->evt_indicator;
706
707                 switch(evt)
708                 {
709                         /*
710                          * New volume loaded on same TID, so we just re-install.
711                          * The TID/controller don't change as it is the same
712                          * I2O device.  It's just new media that we have to
713                          * rescan.
714                          */
715                         case I2O_EVT_IND_BSA_VOLUME_LOAD:
716                         {
717                                 i2ob_install_device(i2ob_dev[unit].i2odev->controller, 
718                                         i2ob_dev[unit].i2odev, unit);
719                                 break;
720                         }
721
722                         /*
723                          * No media, so set all parameters to 0 and set the media
724                          * change flag. The I2O device is still valid, just doesn't
725                          * have media, so we don't want to clear the controller or
726                          * device pointer.
727                          */
728                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
729                         {
730                                 for(i = unit; i <= unit+15; i++)
731                                 {
732                                         i2ob_sizes[i] = 0;
733                                         blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
734                                         i2ob[i].nr_sects = 0;
735                                         i2ob_gendisk.part[i].nr_sects = 0;
736                                 }
737                                 i2ob_media_change_flag[unit] = 1;
738                                 break;
739                         }
740
741                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
742                                 printk(KERN_WARNING "%s: Attempt to eject locked media\n", 
743                                         i2ob_dev[unit].i2odev->dev_name);
744                                 break;
745
746                         /*
747                          * The capacity has changed and we are going to be
748                          * updating the max_sectors and other information 
749                          * about this disk.  We try a revalidate first. If
750                          * the block device is in use, we don't want to
751                          * do that as there may be I/Os bound for the disk
752                          * at the moment.  In that case we read the size 
753                          * from the device and update the information ourselves
754                          * and the user can later force a partition table
755                          * update through an ioctl.
756                          */
757                         case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
758                         {
759                                 u64 size;
760
761                                 if(do_i2ob_revalidate(MKDEV(MAJOR_NR, unit),0) != -EBUSY)
762                                         continue;
763
764                                 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
765                                         i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
766
767                                 spin_lock_irqsave(&I2O_LOCK(unit), flags);      
768                                 i2ob_sizes[unit] = (int)(size>>10);
769                                 i2ob_gendisk.part[unit].nr_sects = size>>9;
770                                 i2ob[unit].nr_sects = (int)(size>>9);
771                                 spin_unlock_irqrestore(&I2O_LOCK(unit), flags); 
772                                 break;
773                         }
774
775                         /* 
776                          * We got a SCSI SMART event, we just log the relevant
777                          * information and let the user decide what they want
778                          * to do with the information.
779                          */
780                         case I2O_EVT_IND_BSA_SCSI_SMART:
781                         {
782                                 char buf[16];
783                                 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
784                                 evt_local->data[16]='\0';
785                                 sprintf(buf,"%s",&evt_local->data[0]);
786                                 printk(KERN_INFO "      Disk Serial#:%s\n",buf);
787                                 printk(KERN_INFO "      ASC 0x%02x \n",evt_local->ASC);
788                                 printk(KERN_INFO "      ASCQ 0x%02x \n",evt_local->ASCQ);
789                                 break;
790                         }
791                 
792                         /*
793                          *      Non event
794                          */
795                          
796                         case 0:
797                                 break;
798                                 
799                         /*
800                          * An event we didn't ask for.  Call the card manufacturer
801                          * and tell them to fix their firmware :)
802                          */
803                         default:
804                                 printk(KERN_INFO "%s: Received event %d we didn't register for\n"
805                                         KERN_INFO "   Blame the I2O card manufacturer 8)\n", 
806                                         i2ob_dev[unit].i2odev->dev_name, evt);
807                                 break;
808                 }
809         };
810
811         complete_and_exit(&i2ob_thread_dead,0);
812         return 0;
813 }
814
815 /*
816  * The timer handler will attempt to restart requests 
817  * that are queued to the driver.  This handler
818  * currently only gets called if the controller
819  * had no more room in its inbound fifo.  
820  */
821
822 static void i2ob_timer_handler(unsigned long q)
823 {
824         request_queue_t *req_queue = (request_queue_t *) q;
825         unsigned long flags;
826
827         /*
828          * We cannot touch the request queue or the timer
829          * flag without holding the queue_lock
830          */
831         spin_lock_irqsave(&req_queue->queue_lock,flags);
832
833         /* 
834          * Clear the timer started flag so that 
835          * the timer can be queued again.
836          */
837         i2ob_timer_started = 0;
838
839         /* 
840          * Restart any requests.
841          */
842         i2ob_request(req_queue);
843
844         /* 
845          * Free the lock.
846          */
847         spin_unlock_irqrestore(&req_queue->queue_lock,flags);
848 }
849
850 static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev)
851 {
852         u32 m;
853         struct i2ob_request *ireq;
854         
855         while((ireq=i2ob_backlog[c->unit])!=NULL)
856         {
857                 int unit;
858
859                 if(atomic_read(&i2ob_queues[c->unit]->queue_depth) > dev->depth/4)
860                         break;
861
862                 m = i2ob_get(dev);
863                 if(m == 0xFFFFFFFF)
864                         break;
865
866                 i2ob_backlog[c->unit] = ireq->next;
867                 if(i2ob_backlog[c->unit] == NULL)
868                         i2ob_backlog_tail[c->unit] = NULL;
869                         
870                 unit = MINOR(ireq->req->rq_dev);
871                 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, unit);
872         }
873         if(i2ob_backlog[c->unit])
874                 return 1;
875         return 0;
876 }
877
878 /*
879  *      The I2O block driver is listed as one of those that pulls the
880  *      front entry off the queue before processing it. This is important
881  *      to remember here. If we drop the io lock then CURRENT will change
882  *      on us. We must unlink CURRENT in this routine before we return, if
883  *      we use it.
884  */
885
886 static void i2ob_request(request_queue_t *q)
887 {
888         struct request *req;
889         struct i2ob_request *ireq;
890         int unit;
891         struct i2ob_device *dev;
892         u32 m;
893         
894         
895         while (!list_empty(&q->queue_head)) {
896                 /*
897                  *      On an IRQ completion if there is an inactive
898                  *      request on the queue head it means it isnt yet
899                  *      ready to dispatch.
900                  */
901                 req = blkdev_entry_next_request(&q->queue_head);
902
903                 if(req->rq_status == RQ_INACTIVE)
904                         return;
905                         
906                 unit = MINOR(req->rq_dev);
907                 dev = &i2ob_dev[(unit&0xF0)];
908
909                 /* 
910                  *      Queue depths probably belong with some kind of 
911                  *      generic IOP commit control. Certainly its not right 
912                  *      its global!  
913                  */
914                 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
915                         break;
916                 
917                 /*
918                  *      Is the channel constipated ?
919                  */
920
921                 if(i2ob_backlog[dev->unit]!=NULL)
922                         break;
923                         
924                 /* Get a message */
925                 m = i2ob_get(dev);
926
927                 if(m==0xFFFFFFFF)
928                 {
929                         /* 
930                          * See if the timer has already been queued.
931                          */
932                         if (!i2ob_timer_started)
933                         {
934                                 DEBUG((KERN_ERR "i2ob: starting timer\n"));
935
936                                 /*
937                                  * Set the timer_started flag to insure
938                                  * that the timer is only queued once.
939                                  * Queing it more than once will corrupt
940                                  * the timer queue.
941                                  */
942                                 i2ob_timer_started = 1;
943
944                                 /* 
945                                  * Set up the timer to expire in
946                                  * 500ms.
947                                  */
948                                 i2ob_timer.expires = jiffies + (HZ >> 1);
949                                 i2ob_timer.data = (unsigned int)q;
950
951                                 /*
952                                  * Start it.
953                                  */
954                                  
955                                 add_timer(&i2ob_timer);
956                                 return;
957                         }
958                 }
959
960                 /*
961                  * Everything ok, so pull from kernel queue onto our queue
962                  */
963                 req->errors = 0;
964                 blkdev_dequeue_request(req);    
965                 req->waiting = NULL;
966                 
967                 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
968                 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
969                 ireq->req = req;
970
971                 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0));
972         }
973 }
974
975
976 /*
977  *      SCSI-CAM for ioctl geometry mapping
978  *      Duplicated with SCSI - this should be moved into somewhere common
979  *      perhaps genhd ?
980  *
981  * LBA -> CHS mapping table taken from:
982  *
983  * "Incorporating the I2O Architecture into BIOS for Intel Architecture 
984  *  Platforms" 
985  *
986  * This is an I2O document that is only available to I2O members,
987  * not developers.
988  *
989  * From my understanding, this is how all the I2O cards do this
990  *
991  * Disk Size      | Sectors | Heads | Cylinders
992  * ---------------+---------+-------+-------------------
993  * 1 < X <= 528M  | 63      | 16    | X/(63 * 16 * 512)
994  * 528M < X <= 1G | 63      | 32    | X/(63 * 32 * 512)
995  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
996  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
997  *
998  */
999 #define BLOCK_SIZE_528M         1081344
1000 #define BLOCK_SIZE_1G           2097152
1001 #define BLOCK_SIZE_21G          4403200
1002 #define BLOCK_SIZE_42G          8806400
1003 #define BLOCK_SIZE_84G          17612800
1004
1005 static void i2o_block_biosparam(
1006         unsigned long capacity,
1007         unsigned short *cyls,
1008         unsigned char *hds,
1009         unsigned char *secs) 
1010
1011         unsigned long heads, sectors, cylinders; 
1012
1013         sectors = 63L;                          /* Maximize sectors per track */ 
1014         if(capacity <= BLOCK_SIZE_528M)
1015                 heads = 16;
1016         else if(capacity <= BLOCK_SIZE_1G)
1017                 heads = 32;
1018         else if(capacity <= BLOCK_SIZE_21G)
1019                 heads = 64;
1020         else if(capacity <= BLOCK_SIZE_42G)
1021                 heads = 128;
1022         else
1023                 heads = 255;
1024
1025         cylinders = capacity / (heads * sectors);
1026
1027         *cyls = (unsigned short) cylinders;     /* Stuff return values */ 
1028         *secs = (unsigned char) sectors; 
1029         *hds  = (unsigned char) heads; 
1030 }
1031
1032
1033 /*
1034  *      Rescan the partition tables
1035  */
1036  
1037 static int do_i2ob_revalidate(kdev_t dev, int maxu)
1038 {
1039         int minor=MINOR(dev);
1040         int i;
1041         
1042         minor&=0xF0;
1043
1044         i2ob_dev[minor].refcnt++;
1045         if(i2ob_dev[minor].refcnt>maxu+1)
1046         {
1047                 i2ob_dev[minor].refcnt--;
1048                 return -EBUSY;
1049         }
1050         
1051         for( i = 15; i>=0 ; i--)
1052         {
1053                 int m = minor+i;
1054                 invalidate_device(MKDEV(MAJOR_NR, m), 1);
1055                 i2ob_gendisk.part[m].start_sect = 0;
1056                 i2ob_gendisk.part[m].nr_sects = 0;
1057         }
1058
1059         /*
1060          *      Do a physical check and then reconfigure
1061          */
1062          
1063         i2ob_install_device(i2ob_dev[minor].controller, i2ob_dev[minor].i2odev,
1064                 minor);
1065         i2ob_dev[minor].refcnt--;
1066         return 0;
1067 }
1068
1069 /*
1070  *      Issue device specific ioctl calls.
1071  */
1072
1073 static int i2ob_ioctl(struct inode *inode, struct file *file,
1074                      unsigned int cmd, unsigned long arg)
1075 {
1076         /* Anyone capable of this syscall can do *real bad* things */
1077
1078         if (!capable(CAP_SYS_ADMIN))
1079                 return -EPERM;
1080         if (!inode || !inode->i_rdev)
1081                 return -EINVAL;
1082
1083         switch (cmd) {
1084                 case HDIO_GETGEO:
1085                 {
1086                         struct hd_geometry g;
1087                         int u = MINOR(inode->i_rdev) & 0xF0;
1088                         i2o_block_biosparam(i2ob_sizes[u]<<1, 
1089                                 &g.cylinders, &g.heads, &g.sectors);
1090                         g.start = get_start_sect(inode->i_rdev);
1091                         return copy_to_user((void *)arg, &g, sizeof(g))
1092                                 ? -EFAULT : 0;
1093                 }
1094         
1095                 case BLKRRPART:
1096                         if(!capable(CAP_SYS_ADMIN))
1097                                 return -EACCES;
1098                         return do_i2ob_revalidate(inode->i_rdev,1);
1099                         
1100                 case BLKGETSIZE:
1101                 case BLKGETSIZE64:
1102                 case BLKFLSBUF:
1103                 case BLKROSET:
1104                 case BLKROGET:
1105                 case BLKRASET:
1106                 case BLKRAGET:
1107                 case BLKPG:
1108                         return blk_ioctl(inode->i_rdev, cmd, arg);
1109                         
1110                 default:
1111                         return -EINVAL;
1112         }
1113 }
1114
1115 /*
1116  *      Close the block device down
1117  */
1118  
1119 static int i2ob_release(struct inode *inode, struct file *file)
1120 {
1121         struct i2ob_device *dev;
1122         int minor;
1123
1124         minor = MINOR(inode->i_rdev);
1125         if (minor >= (MAX_I2OB<<4))
1126                 return -ENODEV;
1127         dev = &i2ob_dev[(minor&0xF0)];
1128
1129         /*
1130          * This is to deail with the case of an application
1131          * opening a device and then the device dissapears while
1132          * it's in use, and then the application tries to release
1133          * it.  ex: Unmounting a deleted RAID volume at reboot. 
1134          * If we send messages, it will just cause FAILs since
1135          * the TID no longer exists.
1136          */
1137         if(!dev->i2odev)
1138                 return 0;
1139
1140         /* Sync the device so we don't get errors */
1141         fsync_dev(inode->i_rdev);
1142
1143         if (dev->refcnt <= 0)
1144                 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
1145         dev->refcnt--;
1146         if(dev->refcnt==0)
1147         {
1148                 /*
1149                  *      Flush the onboard cache on unmount
1150                  */
1151                 u32 msg[5];
1152                 int *query_done = &dev->done_flag;
1153                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1154                 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1155                 msg[2] = i2ob_context|0x40000000;
1156                 msg[3] = (u32)query_done;
1157                 msg[4] = 60<<16;
1158                 DEBUG("Flushing...");
1159                 i2o_post_wait(dev->controller, msg, 20, 60);
1160
1161                 /*
1162                  *      Unlock the media
1163                  */
1164                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1165                 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1166                 msg[2] = i2ob_context|0x40000000;
1167                 msg[3] = (u32)query_done;
1168                 msg[4] = -1;
1169                 DEBUG("Unlocking...");
1170                 i2o_post_wait(dev->controller, msg, 20, 2);
1171                 DEBUG("Unlocked.\n");
1172         
1173                 /*
1174                  * Now unclaim the device.
1175                  */
1176
1177                 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
1178                         printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
1179                 
1180                 DEBUG("Unclaim\n");
1181         }
1182         return 0;
1183 }
1184
1185 /*
1186  *      Open the block device.
1187  */
1188  
1189 static int i2ob_open(struct inode *inode, struct file *file)
1190 {
1191         int minor;
1192         struct i2ob_device *dev;
1193         
1194         if (!inode)
1195                 return -EINVAL;
1196         minor = MINOR(inode->i_rdev);
1197         if (minor >= MAX_I2OB<<4)
1198                 return -ENODEV;
1199         dev=&i2ob_dev[(minor&0xF0)];
1200
1201         if(!dev->i2odev)        
1202                 return -ENODEV;
1203         
1204         if(dev->refcnt++==0)
1205         { 
1206                 u32 msg[6];
1207                 
1208                 DEBUG("Claim ");
1209                 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
1210                 {
1211                         dev->refcnt--;
1212                         printk(KERN_INFO "I2O Block: Could not open device\n");
1213                         return -EBUSY;
1214                 }
1215                 DEBUG("Claimed ");
1216                 
1217                 /*
1218                  *      Mount the media if needed. Note that we don't use
1219                  *      the lock bit. Since we have to issue a lock if it
1220                  *      refuses a mount (quite possible) then we might as
1221                  *      well just send two messages out.
1222                  */
1223                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;               
1224                 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1225                 msg[4] = -1;
1226                 msg[5] = 0;
1227                 DEBUG("Mount ");
1228                 i2o_post_wait(dev->controller, msg, 24, 2);
1229
1230                 /*
1231                  *      Lock the media
1232                  */
1233                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1234                 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1235                 msg[4] = -1;
1236                 DEBUG("Lock ");
1237                 i2o_post_wait(dev->controller, msg, 20, 2);
1238                 DEBUG("Ready.\n");
1239         }               
1240         return 0;
1241 }
1242
1243 /*
1244  *      Issue a device query
1245  */
1246  
1247 static int i2ob_query_device(struct i2ob_device *dev, int table, 
1248         int field, void *buf, int buflen)
1249 {
1250         return i2o_query_scalar(dev->controller, dev->tid,
1251                 table, field, buf, buflen);
1252 }
1253
1254
1255 /*
1256  *      Install the I2O block device we found.
1257  */
1258  
1259 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1260 {
1261         u64 size;
1262         u32 blocksize;
1263         u32 limit;
1264         u8 type;
1265         u32 flags, status;
1266         struct i2ob_device *dev=&i2ob_dev[unit];
1267         int i;
1268
1269         /*
1270          * For logging purposes...
1271          */
1272         printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n", 
1273                         d->lct_data.tid, unit); 
1274
1275         /*
1276          *      Ask for the current media data. If that isn't supported
1277          *      then we ask for the device capacity data
1278          */
1279         if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1280           || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1281         {
1282                 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1283                 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1284         }
1285         
1286         i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1287         i2ob_query_device(dev, 0x0000, 6, &status, 4);
1288         i2ob_sizes[unit] = (int)(size>>10);
1289         i2ob_gendisk.part[unit].nr_sects = size>>9;
1290         i2ob[unit].nr_sects = (int)(size>>9);
1291
1292         /* Set limit based on inbound frame size */
1293         limit = (d->controller->status_block->inbound_frame_size - 8)/2;
1294         limit = limit<<9;
1295
1296         /*
1297          * Max number of Scatter-Gather Elements
1298          */     
1299         for(i=unit;i<=unit+15;i++)
1300         {
1301                 request_queue_t *q = i2ob_dev[unit].req_queue;
1302
1303                 blk_queue_max_sectors(q, 256);
1304                 blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2);
1305
1306                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 2)
1307                         i2ob_dev[i].depth = 32;
1308
1309                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 1)
1310                 {
1311                         blk_queue_max_sectors(q, 32);
1312                         blk_queue_max_segments(q, 8);
1313                         i2ob_dev[i].depth = 4;
1314                 }
1315
1316                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
1317                 {
1318                         blk_queue_max_sectors(q, 8);
1319                         blk_queue_max_segments(q, 8);
1320                 }
1321         }
1322
1323
1324         sprintf(d->dev_name, "%s%c", i2ob_gendisk.major_name, 'a' + (unit>>4));
1325
1326         printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
1327                  d->dev_name, i2ob_dev[unit].max_segments, i2ob_dev[unit].depth, limit);
1328
1329         i2ob_query_device(dev, 0x0000, 0, &type, 1);
1330
1331         printk(KERN_INFO "%s: ", d->dev_name);
1332         switch(type)
1333         {
1334                 case 0: printk("Disk Storage");break;
1335                 case 4: printk("WORM");break;
1336                 case 5: printk("CD-ROM");break;
1337                 case 7: printk("Optical device");break;
1338                 default:
1339                         printk("Type %d", type);
1340         }
1341         if(status&(1<<10))
1342                 printk("(RAID)");
1343         if(((flags & (1<<3)) && !(status & (1<<3))) ||
1344            ((flags & (1<<4)) && !(status & (1<<4))))
1345         {
1346                 printk(KERN_INFO " Not loaded.\n");
1347                 return 1;
1348         }
1349         printk(": %dMB, %d byte sectors",
1350                 (int)(size>>20), blocksize);
1351         if(status&(1<<0))
1352         {
1353                 u32 cachesize;
1354                 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1355                 cachesize>>=10;
1356                 if(cachesize>4095)
1357                         printk(", %dMb cache", cachesize>>10);
1358                 else
1359                         printk(", %dKb cache", cachesize);
1360                 
1361         }
1362         printk(".\n");
1363         printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", 
1364                 d->dev_name, i2ob_dev[unit].req_queue->max_sectors);
1365
1366         /* 
1367          * If this is the first I2O block device found on this IOP,
1368          * we need to initialize all the queue data structures
1369          * before any I/O can be performed. If it fails, this
1370          * device is useless.
1371          */
1372         if(!i2ob_queues[c->unit]) {
1373                 if(i2ob_init_iop(c->unit))
1374                         return 1;
1375         }
1376
1377         /* 
1378          * This will save one level of lookup/indirection in critical 
1379          * code so that we can directly get the queue ptr from the
1380          * device instead of having to go the IOP data structure.
1381          */
1382         dev->req_queue = &i2ob_queues[c->unit]->req_queue;
1383
1384         grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9));
1385
1386         /*
1387          * Register for the events we're interested in and that the
1388          * device actually supports.
1389          */
1390         i2o_event_register(c, d->lct_data.tid, i2ob_context, unit, 
1391                 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1392
1393         return 0;
1394 }
1395
1396 /*
1397  * Initialize IOP specific queue structures.  This is called
1398  * once for each IOP that has a block device sitting behind it.
1399  */
1400 static int i2ob_init_iop(unsigned int unit)
1401 {
1402         int i;
1403
1404         i2ob_queues[unit] = (struct i2ob_iop_queue*)
1405                 kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1406         if(!i2ob_queues[unit])
1407         {
1408                 printk(KERN_WARNING
1409                         "Could not allocate request queue for I2O block device!\n");
1410                 return -1;
1411         }
1412
1413         for(i = 0; i< MAX_I2OB_DEPTH; i++)
1414         {
1415                 i2ob_queues[unit]->request_queue[i].next = 
1416                         &i2ob_queues[unit]->request_queue[i+1];
1417                 i2ob_queues[unit]->request_queue[i].num = i;
1418         }
1419         
1420         /* Queue is MAX_I2OB + 1... */
1421         i2ob_queues[unit]->request_queue[i].next = NULL;
1422         i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1423         atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1424
1425         blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
1426         i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
1427
1428         return 0;
1429 }
1430
1431 /*
1432  * Get the request queue for the given device.
1433  */     
1434 static request_queue_t* i2ob_get_queue(kdev_t dev)
1435 {
1436         return I2O_UNIT(dev).req_queue;
1437 }
1438
1439
1440
1441 /*
1442  * Probe the I2O subsytem for block class devices
1443  */
1444 static void i2ob_scan(int bios)
1445 {
1446         int i;
1447         int warned = 0;
1448
1449         struct i2o_device *d, *b=NULL;
1450         struct i2o_controller *c;
1451         struct i2ob_device *dev;
1452                 
1453         for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1454         {
1455                 c=i2o_find_controller(i);
1456         
1457                 if(c==NULL)
1458                         continue;
1459
1460         /*
1461          *    The device list connected to the I2O Controller is doubly linked
1462          * Here we traverse the end of the list , and start claiming devices
1463          * from that end. This assures that within an I2O controller atleast
1464          * the newly created volumes get claimed after the older ones, thus
1465          * mapping to same major/minor (and hence device file name) after 
1466          * every reboot.
1467          * The exception being: 
1468          * 1. If there was a TID reuse.
1469          * 2. There was more than one I2O controller. 
1470          */
1471
1472         if(!bios)
1473         {
1474                 for (d=c->devices;d!=NULL;d=d->next)
1475                 if(d->next == NULL)
1476                         b = d;
1477         }
1478         else
1479                 b = c->devices;
1480
1481         while(b != NULL)
1482         {
1483                 d=b;
1484                 if(bios)
1485                         b = b->next;
1486                 else
1487                         b = b->prev;
1488
1489                         if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1490                                 continue;
1491
1492                         if(d->lct_data.user_tid != 0xFFF)
1493                                 continue;
1494
1495                         if(bios)
1496                         {
1497                                 if(d->lct_data.bios_info != 0x80)
1498                                         continue;
1499                                 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1500                         }
1501                         else
1502                         {
1503                                 if(d->lct_data.bios_info == 0x80)
1504                                         continue; /*Already claimed on pass 1 */
1505                         }
1506
1507                         if(i2o_claim_device(d, &i2o_block_handler))
1508                         {
1509                                 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1510                                         d->lct_data.tid);
1511                                 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1512                                 continue;
1513                         }
1514
1515                         if(scan_unit<MAX_I2OB<<4)
1516                         {
1517                                 /*
1518                                  * Get the device and fill in the
1519                                  * Tid and controller.
1520                                  */
1521                                 dev=&i2ob_dev[scan_unit];
1522                                 dev->i2odev = d; 
1523                                 dev->controller = c;
1524                                 dev->unit = c->unit;
1525                                 dev->tid = d->lct_data.tid;
1526
1527                                 if(i2ob_install_device(c,d,scan_unit))
1528                                         printk(KERN_WARNING "Could not install I2O block device\n");
1529                                 else
1530                                 {
1531                                         scan_unit+=16;
1532                                         i2ob_dev_count++;
1533
1534                                         /* We want to know when device goes away */
1535                                         i2o_device_notify_on(d, &i2o_block_handler);
1536                                 }
1537                         }
1538                         else
1539                         {
1540                                 if(!warned++)
1541                                         printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1542                         }
1543                         i2o_release_device(d, &i2o_block_handler);
1544                 }
1545                 i2o_unlock_controller(c);
1546         }
1547 }
1548
1549 static void i2ob_probe(void)
1550 {
1551         /*
1552          *      Some overhead/redundancy involved here, while trying to
1553          *      claim the first boot volume encountered as /dev/i2o/hda
1554          *      everytime. All the i2o_controllers are searched and the
1555          *      first i2o block device marked as bootable is claimed
1556          *      If an I2O block device was booted off , the bios sets
1557          *      its bios_info field to 0x80, this what we search for.
1558          *      Assuming that the bootable volume is /dev/i2o/hda
1559          *      everytime will prevent any kernel panic while mounting
1560          *      root partition
1561          */
1562
1563         printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1564         i2ob_scan(1);
1565
1566         /*
1567          *      Now the remainder.
1568          */
1569         printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1570         i2ob_scan(0);
1571 }
1572
1573
1574 /*
1575  * New device notification handler.  Called whenever a new
1576  * I2O block storage device is added to the system.
1577  * 
1578  * Should we spin lock around this to keep multiple devs from 
1579  * getting updated at the same time? 
1580  * 
1581  */
1582 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1583 {
1584         struct i2ob_device *dev;
1585         int unit = 0;
1586
1587         printk(KERN_INFO "i2o_block: New device detected\n");
1588         printk(KERN_INFO "   Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1589
1590         /* Check for available space */
1591         if(i2ob_dev_count>=MAX_I2OB<<4)
1592         {
1593                 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1594                 return;
1595         }
1596         for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1597         {
1598                 if(!i2ob_dev[unit].i2odev)
1599                         break;
1600         }
1601
1602         if(i2o_claim_device(d, &i2o_block_handler))
1603         {
1604                 printk(KERN_INFO 
1605                         "i2o_block: Unable to claim device. Installation aborted\n");
1606                 return;
1607         }
1608
1609         dev = &i2ob_dev[unit];
1610         dev->i2odev = d; 
1611         dev->controller = c;
1612         dev->tid = d->lct_data.tid;
1613
1614         if(i2ob_install_device(c,d,unit))
1615                 printk(KERN_ERR "i2o_block: Could not install new device\n");
1616         else    
1617         {
1618                 i2ob_dev_count++;
1619                 i2o_device_notify_on(d, &i2o_block_handler);
1620         }
1621
1622         i2o_release_device(d, &i2o_block_handler);
1623  
1624         return;
1625 }
1626
1627 /*
1628  * Deleted device notification handler.  Called when a device we
1629  * are talking to has been deleted by the user or some other
1630  * mysterious fource outside the kernel.
1631  */
1632 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1633 {       
1634         int unit = 0;
1635         int i = 0;
1636         unsigned long flags;
1637
1638         spin_lock_irqsave(&I2O_LOCK(c->unit), flags);
1639
1640         /*
1641          * Need to do this...we somtimes get two events from the IRTOS
1642          * in a row and that causes lots of problems.
1643          */
1644         i2o_device_notify_off(d, &i2o_block_handler);
1645
1646         printk(KERN_INFO "I2O Block Device Deleted\n");
1647
1648         for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1649         {
1650                 if(i2ob_dev[unit].i2odev == d)
1651                 {
1652                         printk(KERN_INFO "  /dev/%s: Controller %d Tid %d\n", 
1653                                 d->dev_name, c->unit, d->lct_data.tid);
1654                         break;
1655                 }
1656         }
1657         if(unit >= MAX_I2OB<<4)
1658         {
1659                 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1660                 spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
1661                 return;
1662         }
1663
1664         /* 
1665          * This will force errors when i2ob_get_queue() is called
1666          * by the kenrel.
1667          */
1668         i2ob_dev[unit].req_queue = NULL;
1669         for(i = unit; i <= unit+15; i++)
1670         {
1671                 i2ob_dev[i].i2odev = NULL;
1672                 i2ob_sizes[i] = 0;
1673                 blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
1674                 i2ob[i].nr_sects = 0;
1675                 i2ob_gendisk.part[i].nr_sects = 0;
1676         }
1677         spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags);
1678
1679         /*
1680          * Decrease usage count for module
1681          */     
1682
1683         while(i2ob_dev[unit].refcnt--)
1684                 MOD_DEC_USE_COUNT;
1685
1686         i2ob_dev[unit].refcnt = 0;
1687         
1688         i2ob_dev[i].tid = 0;
1689
1690         /* 
1691          * Do we need this?
1692          * The media didn't really change...the device is just gone
1693          */
1694         i2ob_media_change_flag[unit] = 1;
1695
1696         i2ob_dev_count--;       
1697 }
1698
1699 /*
1700  *      Have we seen a media change ?
1701  */
1702 static int i2ob_media_change(kdev_t dev)
1703 {
1704         int i=MINOR(dev);
1705         i>>=4;
1706         if(i2ob_media_change_flag[i])
1707         {
1708                 i2ob_media_change_flag[i]=0;
1709                 return 1;
1710         }
1711         return 0;
1712 }
1713
1714 static int i2ob_revalidate(kdev_t dev)
1715 {
1716         return do_i2ob_revalidate(dev, 0);
1717 }
1718
1719 /*
1720  * Reboot notifier.  This is called by i2o_core when the system
1721  * shuts down.
1722  */
1723 static void i2ob_reboot_event(void)
1724 {
1725         int i;
1726         
1727         for(i=0;i<MAX_I2OB;i++)
1728         {
1729                 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1730                 
1731                 if(dev->refcnt!=0)
1732                 {
1733                         /*
1734                          *      Flush the onboard cache
1735                          */
1736                         u32 msg[5];
1737                         int *query_done = &dev->done_flag;
1738                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1739                         msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1740                         msg[2] = i2ob_context|0x40000000;
1741                         msg[3] = (u32)query_done;
1742                         msg[4] = 60<<16;
1743                         
1744                         DEBUG("Flushing...");
1745                         i2o_post_wait(dev->controller, msg, 20, 60);
1746
1747                         DEBUG("Unlocking...");
1748                         /*
1749                          *      Unlock the media
1750                          */
1751                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1752                         msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1753                         msg[2] = i2ob_context|0x40000000;
1754                         msg[3] = (u32)query_done;
1755                         msg[4] = -1;
1756                         i2o_post_wait(dev->controller, msg, 20, 2);
1757                         
1758                         DEBUG("Unlocked.\n");
1759                 }
1760         }       
1761 }
1762
1763 static struct block_device_operations i2ob_fops =
1764 {
1765         owner:                  THIS_MODULE,
1766         open:                   i2ob_open,
1767         release:                i2ob_release,
1768         ioctl:                  i2ob_ioctl,
1769         check_media_change:     i2ob_media_change,
1770         revalidate:             i2ob_revalidate,
1771 };
1772
1773 static struct gendisk i2ob_gendisk = 
1774 {
1775         major:          MAJOR_NR,
1776         major_name:     "i2o/hd",
1777         minor_shift:    4,
1778         max_p:          1<<4,
1779         part:           i2ob,
1780         sizes:          i2ob_sizes,
1781         nr_real:        MAX_I2OB,
1782         fops:           &i2ob_fops,
1783 };
1784
1785
1786 /*
1787  * And here should be modules and kernel interface 
1788  *  (Just smiley confuses emacs :-)
1789  */
1790
1791 #ifdef MODULE
1792 #define i2o_block_init init_module
1793 #endif
1794
1795 int i2o_block_init(void)
1796 {
1797         int i;
1798
1799         printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1800         printk(KERN_INFO "   (c) Copyright 1999-2001 Red Hat Software.\n");
1801         
1802         /*
1803          *      Register the block device interfaces
1804          */
1805
1806         if (register_blkdev(MAJOR_NR, "i2o_block", &i2ob_fops)) {
1807                 printk(KERN_ERR "Unable to get major number %d for i2o_block\n",
1808                        MAJOR_NR);
1809                 return -EIO;
1810         }
1811 #ifdef MODULE
1812         printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1813 #endif
1814
1815         /*
1816          *      Now fill in the boiler plate
1817          */
1818          
1819         blksize_size[MAJOR_NR] = i2ob_blksizes;
1820         blk_size[MAJOR_NR] = i2ob_sizes;
1821         blk_dev[MAJOR_NR].queue = i2ob_get_queue;
1822         
1823         blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
1824
1825         for (i = 0; i < MAX_I2OB << 4; i++) {
1826                 i2ob_dev[i].refcnt = 0;
1827                 i2ob_dev[i].flags = 0;
1828                 i2ob_dev[i].controller = NULL;
1829                 i2ob_dev[i].i2odev = NULL;
1830                 i2ob_dev[i].tid = 0;
1831                 i2ob_dev[i].head = NULL;
1832                 i2ob_dev[i].tail = NULL;
1833                 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1834                 i2ob_blksizes[i] = 1024;
1835         }
1836         
1837         /*
1838          *      Set up the queue
1839          */
1840         for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1841         {
1842                 i2ob_queues[i] = NULL;
1843         }
1844
1845         /*
1846          *      Timers
1847          */
1848          
1849         init_timer(&i2ob_timer);
1850         i2ob_timer.function = i2ob_timer_handler;
1851         i2ob_timer.data = 0;
1852         
1853         /*
1854          *      Register the OSM handler as we will need this to probe for
1855          *      drives, geometry and other goodies.
1856          */
1857
1858         if(i2o_install_handler(&i2o_block_handler)<0)
1859         {
1860                 unregister_blkdev(MAJOR_NR, "i2o_block");
1861                 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1862                 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1863                 return -EINVAL;
1864         }
1865         i2ob_context = i2o_block_handler.context;        
1866
1867         /*
1868          * Initialize event handling thread
1869          */
1870         init_MUTEX_LOCKED(&i2ob_evt_sem);
1871         evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1872         if(evt_pid < 0)
1873         {
1874                 printk(KERN_ERR 
1875                         "i2o_block: Could not initialize event thread.  Aborting\n");
1876                 i2o_remove_handler(&i2o_block_handler);
1877                 return 0;
1878         }
1879
1880         /*
1881          *      Finally see what is actually plugged in to our controllers
1882          */
1883         for (i = 0; i < MAX_I2OB; i++)
1884                 register_disk(&i2ob_gendisk, MKDEV(MAJOR_NR,i<<4), 1<<4,
1885                         &i2ob_fops, 0);
1886         i2ob_probe();
1887
1888         /*
1889          *      Adding i2ob_gendisk into the gendisk list.
1890          */
1891         add_gendisk(&i2ob_gendisk);
1892
1893         return 0;
1894 }
1895
1896 #ifdef MODULE
1897
1898 EXPORT_NO_SYMBOLS;
1899 MODULE_AUTHOR("Red Hat Software");
1900 MODULE_DESCRIPTION("I2O Block Device OSM");
1901 MODULE_LICENSE("GPL");
1902
1903 void cleanup_module(void)
1904 {
1905         int i;
1906         
1907         if(evt_running) {
1908                 printk(KERN_INFO "Killing I2O block threads...");
1909                 i = kill_proc(evt_pid, SIGTERM, 1);
1910                 if(!i) {
1911                         printk("waiting...");
1912                 }
1913                 /* Be sure it died */
1914                 wait_for_completion(&i2ob_thread_dead);
1915                 printk("done.\n");
1916         }
1917
1918         /*
1919          * Unregister for updates from any devices..otherwise we still
1920          * get them and the core jumps to random memory :O
1921          */
1922         if(i2ob_dev_count) {
1923                 struct i2o_device *d;
1924                 for(i = 0; i < MAX_I2OB; i++)
1925                 if((d=i2ob_dev[i<<4].i2odev)) {
1926                         i2o_device_notify_off(d, &i2o_block_handler);
1927                         i2o_event_register(d->controller, d->lct_data.tid, 
1928                                 i2ob_context, i<<4, 0);
1929                 }
1930         }
1931         
1932         /*
1933          *      We may get further callbacks for ourself. The i2o_core
1934          *      code handles this case reasonably sanely. The problem here
1935          *      is we shouldn't get them .. but a couple of cards feel 
1936          *      obliged to tell us stuff we dont care about.
1937          *
1938          *      This isnt ideal at all but will do for now.
1939          */
1940          
1941         set_current_state(TASK_UNINTERRUPTIBLE);
1942         schedule_timeout(HZ);
1943         
1944         /*
1945          *      Flush the OSM
1946          */
1947
1948         i2o_remove_handler(&i2o_block_handler);
1949                  
1950         /*
1951          *      Return the block device
1952          */
1953         if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
1954                 printk("i2o_block: cleanup_module failed\n");
1955
1956         /*
1957          * free request queue
1958          */
1959         blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1960
1961         del_gendisk(&i2ob_gendisk);
1962 }
1963 #endif