The Problem: Memory corruption in the modules list in kern_module.c

The initial problem was a panic: module_register_init: module named
if_tun not found, triggered by changes committed on 2007/05/14 21:48 UTC

This is the result of a search for the cause:


The kernel is from Jun 2 11:18 UTC.
The kernel config file:

include GENERIC

ident           Generic-GENERIC

nodevice         fwip            # IP over FireWire (RFC 2734,3146)
nodevice         dcons           # Dumb console driver
nodevice         dcons_crom      # Configuration ROM for dcons


Address of mod->name(if_tun): 0xc3eed5ec, phys: 0x985ec
kbd1 at kbdmux0
ath_hal: 0.9.20.3 (AR5210, AR5211, AR5212, RF5111, RF5112, RF2413, RF5413)
acpi0: <A M I OEMRSDT> on motherboard
Lost if_tun at line /usr/src/sys/modules/acpi/acpi/../../../dev/acpica/Osd/OsdHardware.c:178
tun_mod_p->name = if_t
max number of modules: 403, number of modules now: 403
panic: hunt: lost if_tun
cpuid = 0
KDB: enter: panic
[thread pid 0 tid 0 ]
Stopped at      kdb_enter+0x32: leave
db>
db> bt
Tracing pid 0 tid 0 td 0xc0b1a980
kdb_enter(c0a194b5,0,c0a180c4,c1020b68,0,...) at kdb_enter+0x32
panic(c0a180c4,193,193) at panic+0x124
hunt(c0cdb944,b2,c4004c00,c1020ba8,0,...) at hunt+0x106
AcpiOsWritePort(b2,e1,8,f,0,...) at AcpiOsWritePort+0xbb
AcpiHwSetMode(1,f,c1020bd0,0,c1020bf4,...) at AcpiHwSetMode+0x110
AcpiEnable(c0713c67,c0b1add4,0,0,c4004b80,...) at AcpiEnable+0x38
AcpiEnableSubsystem(0,2,0,0,0,...) at AcpiEnableSubsystem+0x67
acpi_attach(c4004c00,c3fd304c,c0abf87c,c0a1b7a4,80000000,...) at acpi_attach+0x268
device_attach(c4004c00,c4004c00,c0a1b706,917,c4004c00,...) at device_attach+0x36f
device_probe_and_attach(c4004c00,c3f4ba80,c1020ce8,c097ceaa,c3f4ba80,...) at device_probe_and_attach+0x100
bus_generic_attach(c3f4ba80,c3f8a445,c1020d2c,c075975f,c3f4ba80,...) at bus_generic_attach+0x19
nexus_attach(c3f4ba80,c3fc204c,c0abf87c,c0a1b7a4,80000000,...) at nexus_attach+0x1a
device_attach(c3f4ba80,c3f4ba80,c0a1b706,917,c3f4ba80,...) at device_attach+0x36f
device_probe_and_attach(c3f4ba80,c3ee6cec,c1020d6c,c096558c,c0b21d64,...) at device_probe_and_attach+0x100
root_bus_configure(c0b21d64,c1020d88,c0706f66,0,101ec00,...) at root_bus_configure+0x1b
configure(0,101ec00,101ec00,101e000,1028000,...) at configure+0xc
mi_startup() at mi_startup+0x96
begin() at begin+0x2c
db>
(gdb) l *AcpiOsWritePort+0xbb
0x44acb is in AcpiOsWritePort (/usr/src/sys/modules/acpi/acpi/../../../dev/acpica/Osd/OsdHardware.c:178).
173         }
174
175         switch (Width) {
176         case 8:
177             bus_space_write_1(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
178             hunt(__FILE__, __LINE__); /* PHO */
179             break;
180         case 16:
181             bus_space_write_2(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
182             hunt(__FILE__, __LINE__); /* PHO */
(gdb) l *AcpiHwSetMode+0x110
0x21de0 is in AcpiHwSetMode (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/hwacpi.c:180).
175         {
176         case ACPI_SYS_MODE_ACPI:
177
178             /* BIOS should have disabled ALL fixed and GP events */
179
180             Status = AcpiOsWritePort (AcpiGbl_FADT.SmiCommand,
181                             (UINT32) AcpiGbl_FADT.AcpiEnable, 8);
182             ACPI_DEBUG_PRINT ((ACPI_DB_INFO, "Attempting to enable ACPI mode\n"));
183             break;
184
(gdb) l *AcpiEnable+0x38
0x1a968 is in AcpiEnable (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/evxfevnt.c:169).
164         else
165         {
166             /* Transition to ACPI mode */
167
168             Status = AcpiHwSetMode (ACPI_SYS_MODE_ACPI);
169             if (ACPI_FAILURE (Status))
170             {
171                 ACPI_ERROR ((AE_INFO, "Could not transition to ACPI mode"));
172                 return_ACPI_STATUS (Status);
173             }
(gdb) l *AcpiEnableSubsystem+0x67
0x2f447 is in AcpiEnableSubsystem (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/utxface.c:229).
224             ACPI_DEBUG_PRINT ((ACPI_DB_EXEC, "[Init] Going into ACPI mode\n"));
225
226             AcpiGbl_OriginalMode = AcpiHwGetMode();
227
228             Status = AcpiEnable ();
229             if (ACPI_FAILURE (Status))
230             {
231                 ACPI_WARNING ((AE_INFO, "AcpiEnable failed"));
232                 return_ACPI_STATUS (Status);
233             }
(gdb) l *acpi_attach+0x268
0x32548 is in acpi_attach (/usr/src/sys/modules/acpi/acpi/../../../dev/acpica/acpi.c:495).
490         flags = 0;
491         if (testenv("debug.acpi.avoid"))
492             flags = ACPI_NO_DEVICE_INIT | ACPI_NO_OBJECT_INIT;
493
494         /* Bring the hardware and basic handlers online. */
495         if (ACPI_FAILURE(status = AcpiEnableSubsystem(flags))) {
496             device_printf(dev, "Could not enable ACPI: %s\n",
497                           AcpiFormatException(status));
498             goto out;
499         }
(kgdb) l *device_attach+0x36f
0xc075977f is in device_attach (device_if.h:178).
173
174     static __inline int DEVICE_ATTACH(device_t dev)
175     {
176             kobjop_t _m;
177             KOBJOPLOOKUP(((kobj_t)dev)->ops,device_attach);
178             return ((device_attach_t *) _m)(dev);
179     }
180
181     /** @brief Unique descriptor for the DEVICE_DETACH() method */
182     extern struct kobjop_desc device_detach_desc;
(kgdb) l *device_probe_and_attach+0x100
0xc075a2b0 is in device_probe_and_attach (../../../kern/subr_bus.c:2347).
2342                            devnomatch(dev);
2343                            dev->flags |= DF_DONENOMATCH;
2344                    }
2345                    return (error);
2346            }
2347            error = device_attach(dev);
2348
2349            return (error);
2350    }
2351
(kgdb) l *bus_generic_attach+0x19
0xc075a379 is in bus_generic_attach (../../../kern/subr_bus.c:2883).
2878    int
2879    bus_generic_attach(device_t dev)
2880    {
2881            device_t child;
2882
2883            TAILQ_FOREACH(child, &dev->children, link) {
2884                    device_probe_and_attach(child);
2885            }
2886
2887            return (0);
(kgdb) l *nexus_attach+0x1a
0xc097ceaa is in nexus_attach (../../../i386/i386/nexus.c:257).
252     {
253
254             bus_generic_probe(dev);
255             bus_generic_attach(dev);
256             return 0;
257     }
258
259     static int
260     nexus_print_all_resources(device_t dev)
261     {
(kgdb) l *root_bus_configure+0x1b
0xc075a2eb is in root_bus_configure (../../../kern/subr_bus.c:3766).
3761    {
3762            device_t dev;
3763
3764            PDEBUG(("."));
3765
3766            TAILQ_FOREACH(dev, &root_bus->children, link) {
3767                    device_probe_and_attach(dev);
3768            }
3769    }
3770
(kgdb) l *configure+0xc
0xc09655ac is in configure (../../../i386/i386/autoconf.c:119).
114     #ifdef DEV_ISA
115             /*
116              * Explicitly probe and attach ISA last.  The isa bus saves
117              * it's device node at attach time for us here.
118              */
119             if (isa_bus_device)
120                     isa_probe_children(isa_bus_device);
121     #endif
122     }
123

acpidump -t -d -o Tyan_S2720.dsdt > Tyan_S2720.asl
Tyan_S2720.asl
Tyan_S2720.dsdt
dmesg
cvs diff -u sys
Index: sys/dev/acpica/Osd/OsdHardware.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/Osd/OsdHardware.c,v
retrieving revision 1.22
diff -u -r1.22 OsdHardware.c
--- sys/dev/acpica/Osd/OsdHardware.c    31 May 2007 00:52:32 -0000      1.22
+++ sys/dev/acpica/Osd/OsdHardware.c    3 Jun 2007 06:43:46 -0000
@@ -161,7 +161,9 @@
 {
     int error;

+       hunt(__FILE__, __LINE__); /* PHO */
     error = acpi_os_check_port(OutPort, Width);
+       hunt(__FILE__, __LINE__); /* PHO */
     if (error != 0) {
        if (bootverbose)
                printf("acpi: bad write to port 0x%03x (%d), val %#x\n",
@@ -173,12 +175,15 @@
     switch (Width) {
     case 8:
         bus_space_write_1(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+       hunt(__FILE__, __LINE__); /* PHO */
         break;
     case 16:
         bus_space_write_2(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+       hunt(__FILE__, __LINE__); /* PHO */
         break;
     case 32:
         bus_space_write_4(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+       hunt(__FILE__, __LINE__); /* PHO */
         break;
     default:
         /* debug trap goes here */
@@ -303,6 +308,7 @@
     ACPI_STATUS status;
     int bus;

+       hunt(__FILE__, __LINE__); /* PHO */
     if (pci_cfgregopen() == 0)
        panic("AcpiOsDerivePciId unable to initialize pci bus");

@@ -328,4 +334,5 @@
            acpi_name(chandle), (*PciId)->Bus, (*PciId)->Device,
            (*PciId)->Function);
     }
+       hunt(__FILE__, __LINE__); /* PHO */
 }
Index: sys/kern/kern_module.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_module.c,v
retrieving revision 1.52
diff -u -r1.52 kern_module.c
--- sys/kern/kern_module.c      4 Mar 2007 22:36:46 -0000       1.52
+++ sys/kern/kern_module.c      3 Jun 2007 06:44:00 -0000
@@ -43,6 +43,7 @@
 #include <sys/sx.h>
 #include <sys/module.h>
 #include <sys/linker.h>
+#include <sys/kdb.h>

 static MALLOC_DEFINE(M_MODULE, "module", "module data structures");

@@ -65,6 +66,42 @@
 struct sx modules_sx;
 static int nextid = 1;
 static void module_shutdown(void *, int);
+static int found_if_tun = 0;
+static int found_if_tun_print = 0;
+static module_t tun_mod_p = NULL;
+static int num_modules = 0;
+
+void
+hunt(const char *file, int line)
+{
+        module_t mod;
+       int i = 0;
+
+        TAILQ_FOREACH(mod, &modules, link) {
+               i++;
+       }
+       if (i > num_modules) num_modules = i;
+
+        TAILQ_FOREACH(mod, &modules, link) {
+                if (strcmp(mod->name, "if_tun") == 0) {
+                        if (found_if_tun == 0) {
+                                found_if_tun = 1;
+                                if (found_if_tun_print ++ == 0) {
+                                        printf("Address: %p\n", mod->name);
+                                       tun_mod_p = mod;
+//                                     kdb_enter("set hwatch");
+                                }
+                        }
+                       return;
+                }
+        }
+        if (found_if_tun == 1) {
+                printf("Lost if_tun at line %s:%d\n", file, line);
+               printf("tun_mod_p->name = %s\n", tun_mod_p->name);
+               printf("max number of modules: %d, number of modules now: %d\n", num_modules, i);
+               panic("hunt: lost if_tun");
+        }
+}

 static int
 modevent_nop(module_t mod, int what, void *arg)
@@ -114,6 +151,9 @@
        int error;
        module_t mod;

+//        printf("module_register_init(%s)\n", data->name);  /* PHO */
+        hunt(__FILE__, __LINE__); /* PHO */
+
        mtx_lock(&Giant);
        MOD_SLOCK;
        mod = module_lookupbyname(data->name);