The Problem: Memory corruption in the modules list in kern_module.c
The initial problem was a panic: module_register_init: module named
if_tun not found, triggered by changes committed on 2007/05/14 21:48 UTC
This is the result of a search for the cause:
The kernel is from Jun 2 11:18 UTC.
The kernel config file:
include GENERIC
ident Generic-GENERIC
nodevice fwip # IP over FireWire (RFC 2734,3146)
nodevice dcons # Dumb console driver
nodevice dcons_crom # Configuration ROM for dcons
Address of mod->name(if_tun): 0xc3eed5ec, phys: 0x985ec
kbd1 at kbdmux0
ath_hal: 0.9.20.3 (AR5210, AR5211, AR5212, RF5111, RF5112, RF2413, RF5413)
acpi0: <A M I OEMRSDT> on motherboard
Lost if_tun at line /usr/src/sys/modules/acpi/acpi/../../../dev/acpica/Osd/OsdHardware.c:178
tun_mod_p->name = if_t
max number of modules: 403, number of modules now: 403
panic: hunt: lost if_tun
cpuid = 0
KDB: enter: panic
[thread pid 0 tid 0 ]
Stopped at kdb_enter+0x32: leave
db>
db> bt
Tracing pid 0 tid 0 td 0xc0b1a980
kdb_enter(c0a194b5,0,c0a180c4,c1020b68,0,...) at kdb_enter+0x32
panic(c0a180c4,193,193) at panic+0x124
hunt(c0cdb944,b2,c4004c00,c1020ba8,0,...) at hunt+0x106
AcpiOsWritePort(b2,e1,8,f,0,...) at AcpiOsWritePort+0xbb
AcpiHwSetMode(1,f,c1020bd0,0,c1020bf4,...) at AcpiHwSetMode+0x110
AcpiEnable(c0713c67,c0b1add4,0,0,c4004b80,...) at AcpiEnable+0x38
AcpiEnableSubsystem(0,2,0,0,0,...) at AcpiEnableSubsystem+0x67
acpi_attach(c4004c00,c3fd304c,c0abf87c,c0a1b7a4,80000000,...) at acpi_attach+0x268
device_attach(c4004c00,c4004c00,c0a1b706,917,c4004c00,...) at device_attach+0x36f
device_probe_and_attach(c4004c00,c3f4ba80,c1020ce8,c097ceaa,c3f4ba80,...) at device_probe_and_attach+0x100
bus_generic_attach(c3f4ba80,c3f8a445,c1020d2c,c075975f,c3f4ba80,...) at bus_generic_attach+0x19
nexus_attach(c3f4ba80,c3fc204c,c0abf87c,c0a1b7a4,80000000,...) at nexus_attach+0x1a
device_attach(c3f4ba80,c3f4ba80,c0a1b706,917,c3f4ba80,...) at device_attach+0x36f
device_probe_and_attach(c3f4ba80,c3ee6cec,c1020d6c,c096558c,c0b21d64,...) at device_probe_and_attach+0x100
root_bus_configure(c0b21d64,c1020d88,c0706f66,0,101ec00,...) at root_bus_configure+0x1b
configure(0,101ec00,101ec00,101e000,1028000,...) at configure+0xc
mi_startup() at mi_startup+0x96
begin() at begin+0x2c
db>
(gdb) l *AcpiOsWritePort+0xbb
0x44acb is in AcpiOsWritePort (/usr/src/sys/modules/acpi/acpi/../../../dev/acpica/Osd/OsdHardware.c:178).
173 }
174
175 switch (Width) {
176 case 8:
177 bus_space_write_1(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
178 hunt(__FILE__, __LINE__); /* PHO */
179 break;
180 case 16:
181 bus_space_write_2(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
182 hunt(__FILE__, __LINE__); /* PHO */
(gdb) l *AcpiHwSetMode+0x110
0x21de0 is in AcpiHwSetMode (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/hwacpi.c:180).
175 {
176 case ACPI_SYS_MODE_ACPI:
177
178 /* BIOS should have disabled ALL fixed and GP events */
179
180 Status = AcpiOsWritePort (AcpiGbl_FADT.SmiCommand,
181 (UINT32) AcpiGbl_FADT.AcpiEnable, 8);
182 ACPI_DEBUG_PRINT ((ACPI_DB_INFO, "Attempting to enable ACPI mode\n"));
183 break;
184
(gdb) l *AcpiEnable+0x38
0x1a968 is in AcpiEnable (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/evxfevnt.c:169).
164 else
165 {
166 /* Transition to ACPI mode */
167
168 Status = AcpiHwSetMode (ACPI_SYS_MODE_ACPI);
169 if (ACPI_FAILURE (Status))
170 {
171 ACPI_ERROR ((AE_INFO, "Could not transition to ACPI mode"));
172 return_ACPI_STATUS (Status);
173 }
(gdb) l *AcpiEnableSubsystem+0x67
0x2f447 is in AcpiEnableSubsystem (/usr/src/sys/modules/acpi/acpi/../../../contrib/dev/acpica/utxface.c:229).
224 ACPI_DEBUG_PRINT ((ACPI_DB_EXEC, "[Init] Going into ACPI mode\n"));
225
226 AcpiGbl_OriginalMode = AcpiHwGetMode();
227
228 Status = AcpiEnable ();
229 if (ACPI_FAILURE (Status))
230 {
231 ACPI_WARNING ((AE_INFO, "AcpiEnable failed"));
232 return_ACPI_STATUS (Status);
233 }
(gdb) l *acpi_attach+0x268
0x32548 is in acpi_attach (/usr/src/sys/modules/acpi/acpi/../../../dev/acpica/acpi.c:495).
490 flags = 0;
491 if (testenv("debug.acpi.avoid"))
492 flags = ACPI_NO_DEVICE_INIT | ACPI_NO_OBJECT_INIT;
493
494 /* Bring the hardware and basic handlers online. */
495 if (ACPI_FAILURE(status = AcpiEnableSubsystem(flags))) {
496 device_printf(dev, "Could not enable ACPI: %s\n",
497 AcpiFormatException(status));
498 goto out;
499 }
(kgdb) l *device_attach+0x36f
0xc075977f is in device_attach (device_if.h:178).
173
174 static __inline int DEVICE_ATTACH(device_t dev)
175 {
176 kobjop_t _m;
177 KOBJOPLOOKUP(((kobj_t)dev)->ops,device_attach);
178 return ((device_attach_t *) _m)(dev);
179 }
180
181 /** @brief Unique descriptor for the DEVICE_DETACH() method */
182 extern struct kobjop_desc device_detach_desc;
(kgdb) l *device_probe_and_attach+0x100
0xc075a2b0 is in device_probe_and_attach (../../../kern/subr_bus.c:2347).
2342 devnomatch(dev);
2343 dev->flags |= DF_DONENOMATCH;
2344 }
2345 return (error);
2346 }
2347 error = device_attach(dev);
2348
2349 return (error);
2350 }
2351
(kgdb) l *bus_generic_attach+0x19
0xc075a379 is in bus_generic_attach (../../../kern/subr_bus.c:2883).
2878 int
2879 bus_generic_attach(device_t dev)
2880 {
2881 device_t child;
2882
2883 TAILQ_FOREACH(child, &dev->children, link) {
2884 device_probe_and_attach(child);
2885 }
2886
2887 return (0);
(kgdb) l *nexus_attach+0x1a
0xc097ceaa is in nexus_attach (../../../i386/i386/nexus.c:257).
252 {
253
254 bus_generic_probe(dev);
255 bus_generic_attach(dev);
256 return 0;
257 }
258
259 static int
260 nexus_print_all_resources(device_t dev)
261 {
(kgdb) l *root_bus_configure+0x1b
0xc075a2eb is in root_bus_configure (../../../kern/subr_bus.c:3766).
3761 {
3762 device_t dev;
3763
3764 PDEBUG(("."));
3765
3766 TAILQ_FOREACH(dev, &root_bus->children, link) {
3767 device_probe_and_attach(dev);
3768 }
3769 }
3770
(kgdb) l *configure+0xc
0xc09655ac is in configure (../../../i386/i386/autoconf.c:119).
114 #ifdef DEV_ISA
115 /*
116 * Explicitly probe and attach ISA last. The isa bus saves
117 * it's device node at attach time for us here.
118 */
119 if (isa_bus_device)
120 isa_probe_children(isa_bus_device);
121 #endif
122 }
123
acpidump -t -d -o Tyan_S2720.dsdt > Tyan_S2720.asl
Tyan_S2720.asl
Tyan_S2720.dsdt
dmesg
cvs diff -u sys
Index: sys/dev/acpica/Osd/OsdHardware.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/Osd/OsdHardware.c,v
retrieving revision 1.22
diff -u -r1.22 OsdHardware.c
--- sys/dev/acpica/Osd/OsdHardware.c 31 May 2007 00:52:32 -0000 1.22
+++ sys/dev/acpica/Osd/OsdHardware.c 3 Jun 2007 06:43:46 -0000
@@ -161,7 +161,9 @@
{
int error;
+ hunt(__FILE__, __LINE__); /* PHO */
error = acpi_os_check_port(OutPort, Width);
+ hunt(__FILE__, __LINE__); /* PHO */
if (error != 0) {
if (bootverbose)
printf("acpi: bad write to port 0x%03x (%d), val %#x\n",
@@ -173,12 +175,15 @@
switch (Width) {
case 8:
bus_space_write_1(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+ hunt(__FILE__, __LINE__); /* PHO */
break;
case 16:
bus_space_write_2(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+ hunt(__FILE__, __LINE__); /* PHO */
break;
case 32:
bus_space_write_4(ACPI_BUS_SPACE_IO, ACPI_BUS_HANDLE, OutPort, Value);
+ hunt(__FILE__, __LINE__); /* PHO */
break;
default:
/* debug trap goes here */
@@ -303,6 +308,7 @@
ACPI_STATUS status;
int bus;
+ hunt(__FILE__, __LINE__); /* PHO */
if (pci_cfgregopen() == 0)
panic("AcpiOsDerivePciId unable to initialize pci bus");
@@ -328,4 +334,5 @@
acpi_name(chandle), (*PciId)->Bus, (*PciId)->Device,
(*PciId)->Function);
}
+ hunt(__FILE__, __LINE__); /* PHO */
}
Index: sys/kern/kern_module.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_module.c,v
retrieving revision 1.52
diff -u -r1.52 kern_module.c
--- sys/kern/kern_module.c 4 Mar 2007 22:36:46 -0000 1.52
+++ sys/kern/kern_module.c 3 Jun 2007 06:44:00 -0000
@@ -43,6 +43,7 @@
#include <sys/sx.h>
#include <sys/module.h>
#include <sys/linker.h>
+#include <sys/kdb.h>
static MALLOC_DEFINE(M_MODULE, "module", "module data structures");
@@ -65,6 +66,42 @@
struct sx modules_sx;
static int nextid = 1;
static void module_shutdown(void *, int);
+static int found_if_tun = 0;
+static int found_if_tun_print = 0;
+static module_t tun_mod_p = NULL;
+static int num_modules = 0;
+
+void
+hunt(const char *file, int line)
+{
+ module_t mod;
+ int i = 0;
+
+ TAILQ_FOREACH(mod, &modules, link) {
+ i++;
+ }
+ if (i > num_modules) num_modules = i;
+
+ TAILQ_FOREACH(mod, &modules, link) {
+ if (strcmp(mod->name, "if_tun") == 0) {
+ if (found_if_tun == 0) {
+ found_if_tun = 1;
+ if (found_if_tun_print ++ == 0) {
+ printf("Address: %p\n", mod->name);
+ tun_mod_p = mod;
+// kdb_enter("set hwatch");
+ }
+ }
+ return;
+ }
+ }
+ if (found_if_tun == 1) {
+ printf("Lost if_tun at line %s:%d\n", file, line);
+ printf("tun_mod_p->name = %s\n", tun_mod_p->name);
+ printf("max number of modules: %d, number of modules now: %d\n", num_modules, i);
+ panic("hunt: lost if_tun");
+ }
+}
static int
modevent_nop(module_t mod, int what, void *arg)
@@ -114,6 +151,9 @@
int error;
module_t mod;
+// printf("module_register_init(%s)\n", data->name); /* PHO */
+ hunt(__FILE__, __LINE__); /* PHO */
+
mtx_lock(&Giant);
MOD_SLOCK;
mod = module_lookupbyname(data->name);