Index: sys/kern/subr_witness.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/kern/subr_witness.c,v retrieving revision 1.5.4.2.4.2 diff -u -r1.5.4.2.4.2 subr_witness.c --- sys/kern/subr_witness.c 19 Jul 2008 00:35:23 -0000 1.5.4.2.4.2 +++ sys/kern/subr_witness.c 20 Aug 2008 13:54:27 -0000 @@ -1,5 +1,8 @@ /*- - * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. + * Copyright (c) 2008 Isilon Systems, Inc. + * Copyright (c) 2008 Ilya Maykov + * Copyright (c) 1998 Berkeley Software Design, Inc. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -99,17 +102,16 @@ #include #include #include +#include +#include #include #include -#include -#include +#ifdef DDB #include +#endif #include -#include - -MALLOC_DEFINE(M_WITNESS, "Witness", "Witness"); /* Note that these traces do not work with KTR_ALQ. */ #if 0 @@ -125,17 +127,54 @@ /* Define this to check for blessed mutexes */ #undef BLESSING -#define WITNESS_COUNT (512) -#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) +#define WITNESS_COUNT 1024 +#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) +#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ +#define WITNESS_PENDLIST 512 + +/* Allocate 256 KB of stack data space */ +#define WITNESS_LO_DATA_COUNT 2048 + +/* Prime, gives load factor of ~2 at full load */ +#define WITNESS_LO_HASH_SIZE 1021 + +#define LOCK_CHILDCOUNT 2048 + +#define MAX_W_NAME 64 + +#define BADSTACK_SBUF_SIZE (256 * WITNESS_COUNT) +#define CYCLEGRAPH_SBUF_SIZE 8192 +#define FULLGRAPH_SBUF_SIZE 32768 /* - * XXX: This is somewhat bogus, as we assume here that at most 2048 threads - * holding at most LOCK_NCHILDREN locks. We handle failure ok, - * and we should probably be safe for the most part, but it's still a SWAG. + * These flags go in the witness relationship matrix and describe the + * relationship between any two struct witness objects. */ -#define LOCK_CHILDCOUNT 2048 +#define WITNESS_UNRELATED 0x00 /* No lock order relation. */ +#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ +#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ +#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ +#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ +#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) +#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) +#define WITNESS_RELATED_MASK \ + (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) +#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been + * observed. */ +#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ +#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ +#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ + +/* Descendant to ancestor flags */ +#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) + +/* Ancestor to descendant flags */ +#define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) + +#define WITNESS_INDEX_ASSERT(i) \ + MPASS((i) > 0 && (i) <= w_max_used_index && (i) < WITNESS_COUNT) -#define MAX_W_NAME 64 +MALLOC_DEFINE(M_WITNESS, "Witness", "Witness"); /* * The main witness structure. One of these per named lock type in the system @@ -143,185 +182,158 @@ */ struct witness { char w_name[MAX_W_NAME]; - u_int32_t w_index; /* Index in the relationship matrix */ - struct lock_class * w_class; + uint32_t w_index; /* Index in the relationship matrix */ + struct lock_class *w_class; STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ - struct witness * w_hash_next; /* Linked list in hash buckets. */ - const char * w_file; /* File where last acquired */ - u_int32_t w_line; /* Line where last acquired */ - u_int32_t w_refcount; - u_int16_t w_num_ancestors; /* direct/indirect + struct witness *w_hash_next; /* Linked list in hash buckets. */ + const char *w_file; /* File where last acquired */ + uint32_t w_line; /* Line where last acquired */ + uint32_t w_refcount; + uint16_t w_num_ancestors; /* direct/indirect * ancestor count */ - u_int16_t w_num_descendants; /* direct/indirect + uint16_t w_num_descendants; /* direct/indirect * descendant count */ int16_t w_ddb_level; int w_displayed:1; - int w_badmalloc:1; int w_reversed:1; }; STAILQ_HEAD(witness_list, witness); /* - * WITNESS hash table definitions. Hash tables are used to look up witnesses by - * name, and to look up known lock order reversals by witness index pairs. - */ - -static u_int32_t witness_hash_djb2(const u_int8_t *key, u_int32_t size); -static void witness_init_hash_tables(void); - -/* * The witness hash table. Keys are witness names (const char *), elements are * witness objects (struct witness *). */ -#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ struct witness_hash { - u_int32_t wh_size; - u_int32_t wh_count; - struct witness *wh_array[WITNESS_HASH_SIZE]; + struct witness *wh_array[WITNESS_HASH_SIZE]; + uint32_t wh_size; + uint32_t wh_count; }; -static struct witness *witness_hash_get(const char *key); -static void witness_hash_put(struct witness *w); - /* * Key type for the lock order data hash table. */ struct witness_lock_order_key { - uint16_t from; - uint16_t to; + uint16_t from; + uint16_t to; }; -static inline int -witness_lock_order_key_empty(struct witness_lock_order_key *key) -{ - return key->from == 0 && key->to == 0; -} - -static inline int -witness_lock_order_key_equal(struct witness_lock_order_key *a, - struct witness_lock_order_key *b) -{ - return a->from == b->from && a->to == b->to; -} - struct witness_lock_order_data { - struct witness_lock_order_key wlod_key; - struct witness_lock_order_data *wlod_next; - struct stack wlod_stack; + struct stack wlod_stack; + struct witness_lock_order_key wlod_key; + struct witness_lock_order_data *wlod_next; }; -/* Allocate 256 KB of stack data space */ -#define WITNESS_LOCK_ORDER_DATA_COUNT 2048 -/* Prime, gives load factor of ~2 at full load */ -#define WITNESS_LOCK_ORDER_HASH_SIZE 1021 - /* * The witness lock order data hash table. Keys are witness index tuples * (struct witness_lock_order_key), elements are lock order data objects * (struct witness_lock_order_data). */ struct witness_lock_order_hash { - unsigned int wloh_size; - unsigned int wloh_count; - struct witness_lock_order_data *wloh_array[WITNESS_LOCK_ORDER_HASH_SIZE]; + struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; + u_int wloh_size; + u_int wloh_count; +}; + +#ifdef BLESSING +struct witness_blessed { + const char *b_lock1; + const char *b_lock2; }; +#endif -static struct witness_lock_order_data *witness_lock_order_get( - struct witness *parent, struct witness *child); -static int witness_lock_order_add(struct witness *parent, - struct witness *child); -static int witness_lock_order_check(struct witness *parent, - struct witness *child); +struct witness_order_list_entry { + const char *w_name; + struct lock_class *w_class; +}; /* - * These flags go in the witness relationship matrix and describe the - * relationship between any two struct witness objects. + * Returns 0 if one of the locks is a spin lock and the other is not. + * Returns 1 otherwise. */ -#define WITNESS_UNRELATED 0x00 /* No lock order relation. */ -#define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ -#define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ -#define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ -#define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ -#define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) -#define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) -#define WITNESS_RELATED_MASK \ - (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) -#define WITNESS_REVERSAL 0x10 /* A lock order reversal has been - * observed. */ -#define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ -#define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ -#define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ -/* Descendant to ancestor flags */ -#define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) -/* Ancestor to descendant flags */ -#define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) +static __inline int +witness_lock_type_equal(struct witness *w1, struct witness *w2) +{ -#define WITNESS_INDEX_ASSERT(i) \ - MPASS((i) > 0 && (i) <= w_max_used_index && (i) < WITNESS_COUNT) + return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == + (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); +} -#ifdef BLESSING -struct witness_blessed { - const char *b_lock1; - const char *b_lock2; -}; -#endif +static __inline int +witness_lock_order_key_empty(const struct witness_lock_order_key *key) +{ + + return (key->from == 0 && key->to == 0); +} + +static __inline int +witness_lock_order_key_equal(const struct witness_lock_order_key *a, + const struct witness_lock_order_key *b) +{ + + return (a->from == b->from && a->to == b->to); +} +static int _isitmyx(struct witness *w1, struct witness *w2, int rmask, + const char *fname); +#ifdef KDB +static void _witness_debugger(int cond, const char *msg); +#endif +static void adopt(struct witness *parent, struct witness *child); #ifdef BLESSING static int blessed(struct witness *, struct witness *); #endif -static struct witness *enroll(const char *description, - struct lock_class *lock_class); -static int adopt(struct witness *parent, struct witness *child); +static void depart(struct witness *w); +static struct witness *enroll(const char *description, + struct lock_class *lock_class); +static struct lock_instance *find_instance(struct lock_list_entry *list, + struct lock_object *lock); static int isitmychild(struct witness *parent, struct witness *child); static int isitmydescendant(struct witness *parent, struct witness *child); -static int itismychild(struct witness *parent, struct witness *child); +static void itismychild(struct witness *parent, struct witness *child); +static int sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS); static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); -static const char *fixup_filename(const char *file); -static struct witness *witness_get(void); -static struct lock_list_entry *witness_lock_list_get(void); -static void witness_lock_list_free(struct lock_list_entry *lle); -static struct lock_instance *find_instance(struct lock_list_entry *lock_list, - struct lock_object *lock); -static void witness_list_lock(struct lock_instance *instance); +static int sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS); +static void witness_add_fullgraph(struct sbuf *sb, struct witness *parent); #ifdef DDB -static void witness_ddb_level_descendants(struct witness *parent, int level); static void witness_ddb_compute_levels(void); +static void witness_ddb_display(void(*)(const char *fmt, ...)); static void witness_ddb_display_descendants(void(*)(const char *fmt, ...), - struct witness *, int indent); + struct witness *, int indent); static void witness_ddb_display_list(void(*prnt)(const char *fmt, ...), - struct witness_list *list); -static void witness_ddb_display(void(*)(const char *fmt, ...)); + struct witness_list *list); +static void witness_ddb_level_descendants(struct witness *parent, int l); static void witness_ddb_list(struct thread *td); #endif -static void witness_increment_graph_generation(void); - -/* - * Returns 0 if one of the locks is a spin lock and the other is not. - * Returns 1 otherwise. - */ -static __inline int -witness_lock_type_equal(struct witness *w1, struct witness *w2) -{ - return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == - (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); -} +static void witness_free(struct witness *m); +static struct witness *witness_get(void); +static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size); +static struct witness *witness_hash_get(const char *key); +static void witness_hash_put(struct witness *w); +static void witness_init_hash_tables(void); +static void witness_increment_graph_generation(void); +static void witness_lock_list_free(struct lock_list_entry *lle); +static struct lock_list_entry *witness_lock_list_get(void); +static int witness_lock_order_add(struct witness *parent, + struct witness *child); +static int witness_lock_order_check(struct witness *parent, + struct witness *child); +static struct witness_lock_order_data *witness_lock_order_get( + struct witness *parent, + struct witness *child); +static void witness_list_lock(struct lock_instance *instance); -/* - * Assert that the lock class has exactly one of LC_SLEEPLOCK and LC_SPINLOCK - * flags set. - */ -#define WITNESS_LOCK_CLASS_ASSERT(class) \ -do { \ - MPASS(((class)->lc_flags & LC_SLEEPLOCK) != \ - ((class)->lc_flags & LC_SPINLOCK)); \ -} while(0) +#ifdef KDB +#define witness_debugger(c) _witness_debugger(c, __func__) +#else +#define witness_debugger(c) +#endif SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking"); /* - * If 0, witness is disabled. If 1, witness performs full lock order + * If set to 0, witness is disabled. Otherwise witness performs full lock order * checking for all locks. At runtime, witness is allowed to be turned off. * witness is not allowed be turned on once it is turned off, however. */ @@ -337,7 +349,6 @@ * - a lock hierarchy violation occurs * - locks are held when going to sleep. */ -static void witness_debugger(int cond, const char *msg); #ifdef WITNESS_KDB int witness_kdb = 1; #else @@ -363,40 +374,33 @@ int witness_skipspin = 0; #endif TUNABLE_INT("debug.witness.skipspin", &witness_skipspin); -SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, - &witness_skipspin, 0, ""); +SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin, + 0, ""); -static int sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS); /* - * Call this to print out the internal witness structure as a dot graph. + * Call this to print out the relations between locks. */ -SYSCTL_PROC(_debug_witness, OID_AUTO, fullgraph, CTLTYPE_STRING|CTLFLAG_RD, - NULL, 0, sysctl_debug_witness_fullgraph, "A", "Dot graph of witness info"); +SYSCTL_PROC(_debug_witness, OID_AUTO, fullgraph, CTLTYPE_STRING | CTLFLAG_RD, + NULL, 0, sysctl_debug_witness_fullgraph, "A", "Show locks relation graphs"); -static int sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS); /* - * Call this to print out the internal witness structure as a dot graph. + * Call this to print out the witness faulty stacks. */ -SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks, CTLTYPE_STRING|CTLFLAG_RD, - NULL, 0, sysctl_debug_witness_badstacks, "A", "Print bad witness stacks"); - -static int sysctl_debug_witness_cyclegraph(SYSCTL_HANDLER_ARGS); -/* - * Call this to print out the internal witness structure as a dot graph. - */ -SYSCTL_PROC(_debug_witness, OID_AUTO, cyclegraph, CTLTYPE_STRING|CTLFLAG_RD, - NULL, 0, sysctl_debug_witness_cyclegraph, "A", - "Print bad part of witness graph"); +SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks, CTLTYPE_STRING | CTLFLAG_RD, + NULL, 0, sysctl_debug_witness_badstacks, "A", "Show bad witness stacks"); static struct mtx w_mtx; + /* w_list */ static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); + /* w_typelist */ static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); + /* lock list */ -static struct lock_list_entry *w_lock_list_free; +static struct lock_list_entry *w_lock_list_free = NULL; static int w_free_cnt, w_spin_cnt, w_sleep_cnt; SYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, ""); @@ -404,23 +408,20 @@ SYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0, ""); -/* Statically allocated memory */ static struct witness *w_data; -static u_int8_t w_rmatrix[WITNESS_COUNT+1][WITNESS_COUNT+1]; +static uint8_t w_rmatrix[WITNESS_COUNT+1][WITNESS_COUNT+1]; static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; -/* The witness hash */ -static struct witness_hash w_hash; +static struct witness_hash w_hash; /* The witness hash table. */ + /* The lock order data hash */ -static struct witness_lock_order_data w_lodata[WITNESS_LOCK_ORDER_DATA_COUNT]; +static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT]; static struct witness_lock_order_data *w_lofree = NULL; static struct witness_lock_order_hash w_lohash; static int w_max_used_index = 0; static unsigned int w_generation = 0; +static const char *w_notrunning = "Witness not running, witness_watch == 0\n"; +static const char *w_stillcold = "Witness is still cold\n"; -struct witness_order_list_entry { - const char *w_name; - struct lock_class *w_class; -}; static struct witness_order_list_entry order_lists[] = { /* @@ -704,8 +705,8 @@ int i; MALLOC(w_data, struct witness *, - sizeof (struct witness) * WITNESS_COUNT, - M_WITNESS, M_NOWAIT | M_ZERO); + sizeof (struct witness) * WITNESS_COUNT, M_WITNESS, + M_NOWAIT | M_ZERO); /* * We have to release Giant before initializing its witness @@ -720,12 +721,12 @@ for (i = WITNESS_COUNT - 1; i >= 0; i--) { w = &w_data[i]; memset(w, 0, sizeof(*w)); - STAILQ_INSERT_HEAD(&w_free, w, w_list); - w_data[i].w_index = i; /* Witness index never changes. */ - w_free_cnt++; + w_data[i].w_index = i; /* Witness index never changes. */ + witness_free(w); } KASSERT(STAILQ_FIRST(&w_free)->w_index == 0, - ("%s: XXX!", __func__)); + ("%s: Invalid list of free witness objects", __func__)); + /* Witness with index 0 is not used to aid in debugging. */ STAILQ_REMOVE_HEAD(&w_free, w_list); w_free_cnt--; @@ -735,31 +736,25 @@ for (i = 0; i < LOCK_CHILDCOUNT; i++) witness_lock_list_free(&w_locklistdata[i]); - witness_init_hash_tables(); - printf("%s: initialized the WITNESS hash table\n", __func__); /* First add in all the specified order lists. */ for (order = order_lists; order->w_name != NULL; order++) { - WITNESS_LOCK_CLASS_ASSERT(order->w_class); w = enroll(order->w_name, order->w_class); if (w == NULL) continue; w->w_file = "order list"; for (order++; order->w_name != NULL; order++) { - WITNESS_LOCK_CLASS_ASSERT(order->w_class); w1 = enroll(order->w_name, order->w_class); if (w1 == NULL) continue; w1->w_file = "order list"; - if (!itismychild(w, w1)) - panic("Not enough memory for static orders!"); + itismychild(w, w1); w = w1; } } witness_spin_warn = 1; - printf("%s: enrolled locks in the order list\n", __func__); /* Iterate through all locks and add them to witness. */ while (!STAILQ_EMPTY(&pending_locks)) { lock = STAILQ_FIRST(&pending_locks); @@ -773,27 +768,10 @@ /* Mark the witness code as being ready for use. */ witness_cold = 0; - printf("%s: WITNESS initialized!\n", __func__); mtx_lock(&Giant); } -SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) - -static int -sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) -{ - int error, value; - - value = witness_watch; - error = sysctl_handle_int(oidp, &value, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (value == witness_watch) - return (0); - if (value != 0) - return (EINVAL); - witness_watch = 0; - return (0); -} +SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, + NULL); void witness_init(struct lock_object *lock) @@ -802,7 +780,6 @@ /* Various sanity checks. */ class = LOCK_CLASS(lock); - WITNESS_LOCK_CLASS_ASSERT(class); if ((lock->lo_flags & LO_RECURSABLE) != 0 && (class->lc_flags & LC_RECURSABLE) == 0) panic("%s: lock (%s) %s can not be recursable", __func__, @@ -815,6 +792,7 @@ (class->lc_flags & LC_UPGRADABLE) == 0) panic("%s: lock (%s) %s can not be upgradable", __func__, class->lc_name, lock->lo_name); + /* * If we shouldn't watch this lock, then just clear lo_witness. * Otherwise, if witness_cold is set, then it is too early to @@ -844,29 +822,18 @@ panic("lock (%s) %s destroyed while witness_cold", class->lc_name, lock->lo_name); - WITNESS_LOCK_CLASS_ASSERT(class); - /* XXX: need to verify that no one holds the lock */ - if ((lock->lo_flags & (LO_WITNESS | LO_ENROLLPEND)) == LO_WITNESS && - lock->lo_witness != NULL) { - w = lock->lo_witness; - - mtx_lock_spin(&w_mtx); - MPASS(w->w_refcount > 0); - w->w_refcount--; + if ((lock->lo_flags & LO_WITNESS) == 0 || lock->lo_witness == NULL) + return; + w = lock->lo_witness; - /* - * Clean out file and line pointers, in case the module - * they belong to is being unloaded. - */ - if (w->w_refcount == 0) { - w->w_file = NULL; - w->w_line = 0; - witness_increment_graph_generation(); - } + mtx_lock_spin(&w_mtx); + MPASS(w->w_refcount > 0); + w->w_refcount--; - mtx_unlock_spin(&w_mtx); - } + if (w->w_refcount == 0) + depart(w); + mtx_unlock_spin(&w_mtx); /* * If this lock is destroyed before witness is up and running, @@ -894,28 +861,28 @@ * Look for locks with no parents and level all their descendants. */ STAILQ_FOREACH(w, &w_all, w_list) { + /* If the witness has ancestors (is not a root), skip it. */ if (w->w_num_ancestors > 0) continue; - witness_ddb_level_descendants(w, 0); } } static void -witness_ddb_level_descendants(struct witness *w, int level) +witness_ddb_level_descendants(struct witness *w, int l) { int i; - if (w->w_ddb_level >= level) + if (w->w_ddb_level >= l) return; - w->w_ddb_level = level; - level++; + w->w_ddb_level = l; + l++; for (i = 1; i <= w_max_used_index; i++) { if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) - witness_ddb_level_descendants(&w_data[i], level); + witness_ddb_level_descendants(&w_data[i], l); } } @@ -951,20 +918,16 @@ static void witness_ddb_display_list(void(*prnt)(const char *fmt, ...), - struct witness_list *list) + struct witness_list *list) { struct witness *w; STAILQ_FOREACH(w, list, w_typelist) { - if (w->w_ddb_level > 0) + if (w->w_file == NULL || w->w_ddb_level > 0) continue; - /* - * This lock has no anscestors, display its descendants. - */ + + /* This lock has no anscestors - display its descendants. */ witness_ddb_display_descendants(prnt, w, 0); - if (db_pager_quit) { - break; - } } } @@ -973,7 +936,7 @@ { struct witness *w; - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); witness_ddb_compute_levels(); /* Clear all the displayed flags. */ @@ -998,7 +961,7 @@ */ prnt("\nLocks which were never acquired:\n"); STAILQ_FOREACH(w, &w_all, w_list) { - if (w->w_file != NULL) + if (w->w_file != NULL || w->w_refcount == 0) continue; prnt("%s (type: %s, depth: %d)\n", w->w_name, w->w_class->lc_name, w->w_ddb_level); @@ -1030,7 +993,7 @@ lock2->lo_witness == NULL) return (EINVAL); - MPASS(!mtx_owned(&w_mtx)); + mtx_assert(&w_mtx, MA_NOTOWNED); mtx_lock_spin(&w_mtx); /* @@ -1044,9 +1007,8 @@ /* Try to add the new order. */ CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, - lock2->lo_type, lock1->lo_type); - if (!itismychild(lock1->lo_witness, lock2->lo_witness)) - return (ENOMEM); + lock2->lo_witness->w_name, lock1->lo_witness->w_name); + itismychild(lock1->lo_witness, lock2->lo_witness); mtx_unlock_spin(&w_mtx); return (0); } @@ -1064,7 +1026,7 @@ if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || panicstr != NULL) - goto out; + return; /* * Try locks do not block if they fail to acquire the lock, thus @@ -1074,7 +1036,7 @@ * that happens. */ if (flags & LOP_TRYLOCK) - panic("%s should not be called for try lock operations", + panic("%s: should not be called for any try lock operations", __func__); w = lock->lo_witness; @@ -1082,9 +1044,8 @@ td = curthread; file = fixup_filename(file); - WITNESS_LOCK_CLASS_ASSERT(class); - if (class->lc_flags & LC_SLEEPLOCK) { + /* * Since spin locks include a critical section, this check * implicitly enforces a lock order of all sleep locks before @@ -1099,9 +1060,10 @@ * no order checking is needed. */ if (td->td_sleeplocks == NULL) - goto out; + return; lock_list = &td->td_sleeplocks; } else { + /* * If this is the first lock, just return as no order * checking is needed. We check this in both if clauses @@ -1112,12 +1074,13 @@ * rest of the check. */ if (PCPU_GET(spinlocks) == NULL) - goto out; + return; lock_list = PCPU_PTR(spinlocks); } + /* Empty list? */ if ((*lock_list)->ll_count == 0) - goto out; + return; /* * Check to see if we are recursing on a lock we already own. If @@ -1142,8 +1105,9 @@ lock1->li_file, lock1->li_line); panic("excl->share"); } - goto out; + return; } + /* * Try to perform most checks without a lock. If this succeeds we * can skip acquiring the lock and return success. @@ -1151,7 +1115,8 @@ lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; w1 = lock1->li_lock->lo_witness; if (witness_lock_order_check(w1, w)) - goto out; + return; + /* * Check for duplicate locks of the same type. Note that we only * have to check for this on the last lock we just acquired. Any @@ -1161,31 +1126,30 @@ witness_lock_order_add(w1, w); if (w1 == w) { i = w->w_index; - if (!(lock->lo_flags & LO_DUPOK) && + if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) && !(w_rmatrix[i][i] & WITNESS_REVERSAL)) { w_rmatrix[i][i] |= WITNESS_REVERSAL; w->w_reversed = 1; mtx_unlock_spin(&w_mtx); - printf("acquiring duplicate lock of same type: \"%s\"\n", - lock->lo_type); + printf("acquiring duplicate lock of same type: \"%s\"\n", + w->w_name); printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, lock1->li_file, lock1->li_line); printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); -#ifdef KDB - witness_debugger(1, __func__); -#endif + witness_debugger(1); } else mtx_unlock_spin(&w_mtx); - goto out; + return; } - MPASS(mtx_owned(&w_mtx)); + mtx_assert(&w_mtx, MA_OWNED); + /* * If we know that the the lock we are acquiring comes after * the lock we most recently acquired in the lock order tree, * then there is no need for any further checks. */ if (isitmychild(w1, w)) - goto out_unlock; + goto out; for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { for (i = lle->ll_count - 1; i >= 0; i--, j++) { @@ -1203,6 +1167,7 @@ ("lock missing witness structure")); continue; } + /* * If we are locking Giant and this is a sleepable * lock, then skip it. @@ -1210,6 +1175,7 @@ if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && lock == &Giant.lock_object) continue; + /* * If we are locking a sleepable lock and this lock * is Giant, then skip it. @@ -1217,6 +1183,7 @@ if ((lock->lo_flags & LO_SLEEPABLE) != 0 && lock1->li_lock == &Giant.lock_object) continue; + /* * If we are locking a sleepable lock and this lock * isn't sleepable, we want to treat it as a lock @@ -1226,6 +1193,7 @@ if (((lock->lo_flags & LO_SLEEPABLE) != 0 && (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) goto reversal; + /* * If we are locking Giant and this is a non-sleepable * lock, then treat it as a reversal. @@ -1233,29 +1201,32 @@ if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && lock == &Giant.lock_object) goto reversal; + /* * Check the lock order hierarchy for a reveresal. */ if (!isitmydescendant(w, w1)) continue; reversal: + /* * We have a lock order violation, check to see if it * is allowed or has already been yelled about. */ - #ifdef BLESSING + /* * If the lock order is blessed, just bail. We don't * look for other lock order violations though, which * may be a bug. */ if (blessed(w, w1)) - goto out_unlock; + goto out; #endif + /* Bail if this violation is known */ if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL) - goto out_unlock; + goto out; /* Record this as a violation */ w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL; @@ -1277,6 +1248,7 @@ "lock order reversal: (Giant after non-sleepable)\n"); else printf("lock order reversal:\n"); + /* * Try to locate an earlier lock with * witness w in our list. @@ -1296,29 +1268,26 @@ if (i < 0) { printf(" 1st %p %s (%s) @ %s:%d\n", lock1->li_lock, lock1->li_lock->lo_name, - lock1->li_lock->lo_type, lock1->li_file, - lock1->li_line); + w1->w_name, lock1->li_file, lock1->li_line); printf(" 2nd %p %s (%s) @ %s:%d\n", lock, - lock->lo_name, lock->lo_type, file, line); + lock->lo_name, w->w_name, file, line); } else { printf(" 1st %p %s (%s) @ %s:%d\n", lock2->li_lock, lock2->li_lock->lo_name, - lock2->li_lock->lo_type, lock2->li_file, - lock2->li_line); + lock2->li_lock->lo_witness->w_name, + lock2->li_file, lock2->li_line); printf(" 2nd %p %s (%s) @ %s:%d\n", lock1->li_lock, lock1->li_lock->lo_name, - lock1->li_lock->lo_type, lock1->li_file, - lock1->li_line); + w1->w_name, lock1->li_file, lock1->li_line); printf(" 3rd %p %s (%s) @ %s:%d\n", lock, - lock->lo_name, lock->lo_type, file, line); + lock->lo_name, w->w_name, file, line); } -#ifdef KDB - witness_debugger(1, __func__); -#endif - goto out; + witness_debugger(1); + return; } } lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; + /* * If requested, build a new lock order. However, don't build a new * relationship between a sleepable lock and Giant if it is in the @@ -1330,14 +1299,10 @@ (lock->lo_flags & LO_SLEEPABLE) != 0)) { CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, lock->lo_type, lock1->li_lock->lo_type); - if (!itismychild(lock1->li_lock->lo_witness, w)) - /* Witness is dead. */ - goto out; + itismychild(lock1->li_lock->lo_witness, w); } -out_unlock: - mtx_unlock_spin(&w_mtx); out: - return; + mtx_unlock_spin(&w_mtx); } void @@ -1355,8 +1320,6 @@ td = curthread; file = fixup_filename(file); - WITNESS_LOCK_CLASS_ASSERT(LOCK_CLASS(lock)); - /* Determine lock list for this lock. */ if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) lock_list = &td->td_sleeplocks; @@ -1408,13 +1371,10 @@ struct lock_instance *instance; struct lock_class *class; - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); - - WITNESS_LOCK_CLASS_ASSERT(class); - file = fixup_filename(file); if ((lock->lo_flags & LO_UPGRADABLE) == 0) panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", @@ -1446,13 +1406,10 @@ struct lock_instance *instance; struct lock_class *class; - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); - - WITNESS_LOCK_CLASS_ASSERT(class); - file = fixup_filename(file); if ((lock->lo_flags & LO_UPGRADABLE) == 0) panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", @@ -1491,8 +1448,6 @@ class = LOCK_CLASS(lock); file = fixup_filename(file); - WITNESS_LOCK_CLASS_ASSERT(class); - /* Find lock instance associated with this lock. */ if (class->lc_flags & LC_SLEEPLOCK) lock_list = &td->td_sleeplocks; @@ -1563,12 +1518,23 @@ witness_thread_exit(struct thread *td) { struct lock_list_entry *lle; + int i, n; lle = td->td_sleeplocks; - if (lle == NULL) + if (lle == NULL || panicstr != NULL) return; - if (lle->ll_count != 0) - panic("Thread %p: lock list entry not empty", td); + if (lle->ll_count != 0) { + for (n = 0; lle != NULL; lle = lle->ll_next) + for (i = lle->ll_count - 1; i >= 0; i--) { + if (n == 0) + printf("Thread %p exiting with the following locks held:\n", + td); + n++; + witness_list_lock(&lle->ll_children[i]); + + } + panic("Thread %p cannot exit while holding sleeplocks\n", td); + } witness_lock_list_free(lle); } @@ -1582,7 +1548,7 @@ int witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) { - struct lock_list_entry *lle; + struct lock_list_entry **lock_list, *lle; struct lock_instance *lock1; struct thread *td; va_list ap; @@ -1616,6 +1582,12 @@ witness_list_lock(lock1); } if (PCPU_GET(spinlocks) != NULL) { + lock_list = PCPU_PTR(spinlocks); + + /* Empty list? */ + if ((*lock_list)->ll_count == 0) + return (n); + /* * Since we already hold a spinlock preemption is * already blocked. @@ -1633,10 +1605,8 @@ } if (flags & WARN_PANIC && n) panic("%s", __func__); -#ifdef KDB else - witness_debugger(n, __func__); -#endif + witness_debugger(n); return (n); } @@ -1671,10 +1641,10 @@ MPASS(description != NULL); if (witness_watch == 0 || panicstr != NULL) - return NULL; + return (NULL); if ((lock_class->lc_flags & LC_SPINLOCK)) { if (witness_skipspin) - return NULL; + return (NULL); else typelist = &w_spin; } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) @@ -1687,9 +1657,8 @@ w = witness_hash_get(description); if (w) goto found; - if ((w = witness_get()) == NULL) - return NULL; + return (NULL); MPASS(strlen(description) < MAX_W_NAME); strcpy(w->w_name, description); w->w_class = lock_class; @@ -1702,11 +1671,12 @@ STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); w_sleep_cnt++; } + /* Insert new witness into the hash */ witness_hash_put(w); witness_increment_graph_generation(); mtx_unlock_spin(&w_mtx); - return w; + return (w); found: w->w_refcount++; mtx_unlock_spin(&w_mtx); @@ -1718,16 +1688,39 @@ return (w); } -static int +static void +depart(struct witness *w) +{ + struct witness_list *list; + + MPASS(w->w_refcount == 0); + if (w->w_class->lc_flags & LC_SLEEPLOCK) { + list = &w_sleep; + w_sleep_cnt--; + } else { + list = &w_spin; + w_spin_cnt--; + } + /* + * Set file to NULL as it may point into a loadable module. + */ + w->w_file = NULL; + w->w_line = 0; + witness_increment_graph_generation(); +} + + +static void adopt(struct witness *parent, struct witness *child) { int pi, ci, i, j; - MPASS(mtx_owned(&w_mtx) || witness_cold); + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); /* If the relationship is already known, there's no work to be done. */ if (isitmychild(parent, child)) - return 1; + return; /* When the structure of the graph changes, bump up the generation. */ witness_increment_graph_generation(); @@ -1743,25 +1736,28 @@ MPASS(pi != ci); w_rmatrix[pi][ci] |= WITNESS_PARENT; w_rmatrix[ci][pi] |= WITNESS_CHILD; + /* * If parent was not already an ancestor of child, * then we increment the descendant and ancestor counters. */ - if (!(w_rmatrix[pi][ci] & WITNESS_ANCESTOR)) { + if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) { parent->w_num_descendants++; child->w_num_ancestors++; } + /* * Find each ancestor of 'pi'. Note that 'pi' itself is counted as * an ancestor of 'pi' during this loop. */ for (i = 1; i <= w_max_used_index; i++) { - if (!(w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) && + if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && (i != pi)) continue; /* Find each descendant of 'i' and mark it as a descendant. */ for (j = 1; j <= w_max_used_index; j++) { + /* * Skip children that are already marked as * descendants of 'i'. @@ -1773,10 +1769,9 @@ * We are only interested in descendants of 'ci'. Note * that 'ci' itself is counted as a descendant of 'ci'. */ - if (!(w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) && + if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && (j != ci)) continue; - w_rmatrix[i][j] |= WITNESS_ANCESTOR; w_rmatrix[j][i] |= WITNESS_DESCENDANT; w_data[i].w_num_descendants++; @@ -1807,23 +1802,25 @@ } } } - return 1; } -static int +static void itismychild(struct witness *parent, struct witness *child) { - MPASS(witness_cold || mtx_owned(&w_mtx)); + MPASS(child != NULL && parent != NULL); + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); + if (!witness_lock_type_equal(parent, child)) { - if (mtx_owned(&w_mtx)) + if (witness_cold == 0) mtx_unlock_spin(&w_mtx); panic("%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " "the same lock type", __func__, parent->w_name, parent->w_class->lc_name, child->w_name, child->w_class->lc_name); } - return adopt(parent, child); + adopt(parent, child); } /* @@ -1842,6 +1839,7 @@ WITNESS_INDEX_ASSERT(i2); r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK; r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK; + /* The flags on one better be the inverse of the flags on the other */ if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { @@ -1863,7 +1861,8 @@ static int isitmychild(struct witness *parent, struct witness *child) { - return _isitmyx(parent, child, WITNESS_PARENT, __func__); + + return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); } /* @@ -1872,7 +1871,9 @@ static int isitmydescendant(struct witness *ancestor, struct witness *descendant) { - return _isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, __func__); + + return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, + __func__)); } #ifdef BLESSING @@ -1903,7 +1904,8 @@ struct witness *w; int index; - MPASS(mtx_owned(&w_mtx) || witness_cold); + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); if (witness_watch == 0) { mtx_unlock_spin(&w_mtx); @@ -1912,7 +1914,7 @@ if (STAILQ_EMPTY(&w_free)) { witness_watch = 0; mtx_unlock_spin(&w_mtx); - printf("%s: witness exhausted!\n", __func__); + printf("WITNESS: unable to allocate a new witness object\n"); return (NULL); } w = STAILQ_FIRST(&w_free); @@ -1923,10 +1925,19 @@ index < WITNESS_COUNT); bzero(w, sizeof(*w)); w->w_index = index; - w_max_used_index = index; + if (index > w_max_used_index) + w_max_used_index = index; return (w); } +static void +witness_free(struct witness *w) +{ + + STAILQ_INSERT_HEAD(&w_free, w, w_list); + w_free_cnt++; +} + static struct lock_list_entry * witness_lock_list_get(void) { @@ -1959,13 +1970,13 @@ } static struct lock_instance * -find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) +find_instance(struct lock_list_entry *list, struct lock_object *lock) { struct lock_list_entry *lle; struct lock_instance *instance; int i; - for (lle = lock_list; lle != NULL; lle = lle->ll_next) + for (lle = list; lle != NULL; lle = lle->ll_next) for (i = lle->ll_count - 1; i >= 0; i--) { instance = &lle->ll_children[i]; if (instance->li_lock == lock) @@ -1982,8 +1993,8 @@ lock = instance->li_lock; printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); - if (lock->lo_type != lock->lo_name) - printf(" (%s)", lock->lo_type); + if (lock->lo_witness->w_name != lock->lo_name) + printf(" (%s)", lock->lo_witness->w_name); printf(" r = %d (%p) locked @ %s:%d\n", instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, instance->li_line); @@ -2053,7 +2064,7 @@ struct lock_instance *instance; struct lock_class *class; - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); @@ -2079,7 +2090,7 @@ struct lock_instance *instance; struct lock_class *class; - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); @@ -2107,13 +2118,9 @@ struct lock_instance *instance; struct lock_class *class; - class = LOCK_CLASS(lock); - if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; - - WITNESS_LOCK_CLASS_ASSERT(class); - + class = LOCK_CLASS(lock); if ((class->lc_flags & LC_SLEEPLOCK) != 0) instance = find_instance(curthread->td_sleeplocks, lock); else if ((class->lc_flags & LC_SPINLOCK) != 0) @@ -2174,16 +2181,15 @@ struct witness *w; w = lock->lo_witness; - if (w) { + if (w != NULL) db_printf(" LOCKED @ %s:%d\n", w->w_file, w->w_line); - } } static void witness_ddb_list(struct thread *td) { - KASSERT(!witness_cold, ("%s: witness_cold", __func__)); + KASSERT(witness_cold == 0, ("%s: witness_cold", __func__)); KASSERT(kdb_active, ("%s: not in the debugger", __func__)); if (witness_watch == 0) @@ -2236,7 +2242,7 @@ if (!witness_thread_has_locks(td)) continue; db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid, - p->p_comm, td, td->td_tid); + td->td_name, td, td->td_tid); witness_ddb_list(td); } } @@ -2244,146 +2250,24 @@ DB_SHOW_COMMAND(witness, db_witness_display) { + witness_ddb_display(db_printf); } #endif -/* - * sysctl debug.witness.dotgraph and friends. - */ - -#if 1 //ndef INVARIANT_SUPPORT -#define WITNESS_SANE() (1) -#else -#define WITNESS_SANE() witness_sane() - -static int witness_sane(void); - -#define _LC_TYPEMASK (LC_SLEEPLOCK | LC_SPINLOCK) - -/* - * Check if a node in the witness graph got all of its' pointers right. - */ -static int -witness_sane() -{ - int i, j, error = 0; - unsigned char r1, r2; - struct witness *w1, *w2; - - mtx_assert(&w_mtx, MA_OWNED | MA_NOTRECURSED); - - for (i = 0; i <= WITNESS_COUNT; i++) { - for (j = 0; j <= WITNESS_COUNT; j++) { - if ((i == 0 || j == 0 || - i > w_max_used_index || j > w_max_used_index)) { - if (w_rmatrix[i][j] != WITNESS_UNRELATED) { - mtx_unlock_spin(&w_mtx); - printf("%s: rmatrix[%d][%d] is not 0!\n", - __func__, i, j); - error = 1; - goto out; - } - if (w_rmatrix[j][i] != WITNESS_UNRELATED) { - mtx_unlock_spin(&w_mtx); - printf("%s: rmatrix[%d][%d] is not 0!\n", - __func__, j, i); - error = 1; - goto out; - } - } - r1 = w_rmatrix[i][j] & WITNESS_RELATED_MASK; - r2 = w_rmatrix[j][i] & WITNESS_RELATED_MASK; - if ((WITNESS_ATOD(r1) != r2 && - WITNESS_DTOA(r1) != r2) || - (WITNESS_DTOA(r2) != r1 && - WITNESS_ATOD(r2) != r1)) { - mtx_unlock_spin(&w_mtx); - printf("%s: rmatrix[%d][%d] == %hhx and [%d][%d] == %hhx dont match!", - __func__, i, j, r1, j, i, r2); - error = 1; - goto out; - } - if (r1 == WITNESS_UNRELATED && r2 == WITNESS_UNRELATED) - continue; - - w1 = &w_data[i]; - w2 = &w_data[j]; - if ((w1->w_class->lc_flags & _LC_TYPEMASK) != - (w2->w_class->lc_flags & _LC_TYPEMASK)) { - mtx_unlock_spin(&w_mtx); - printf("%s: %s and %s have different lock classes!\n", - __func__, w1->w_name, w2->w_name); - error = 1; - goto out; - } - } - } - -out: - return !error; -} - -#undef _LC_TYPEMASK - -#endif /* INVARIANT_SUPPORT */ - -static void witness_dotty_fixname(const char *in, char *out, size_t len); -static void witness_fixup_string(const char *in, char *out, size_t len, - const char *before, size_t blen, const char *after, size_t alen); - -/* - * Iterates through the string "in". For every character in "in" that matches a - * character in the string "before", replace it with the corresponding character - * in the string "after" (i.e. before[1] -> after[1]). Store the result in the - * string "out". - */ -static void -witness_fixup_string(const char *in, char *out, size_t len, const char *before, - size_t blen, const char *after, size_t alen) -{ - size_t size; - unsigned int i, j; - if (!in || !out) - return; - MPASS(blen == alen); - size = min(strlen(in), len-1); - memcpy(out, in, size); - for (i = 0; i < size; i++) { - for (j = 0; j < blen; j++) { - if (out[i] == before[j]) - out[i] = after[j]; - } - } - out[i] = '\0'; -} - -/* fix up the witness names so they're acceptable to dotty */ -static void -witness_dotty_fixname(const char *in, char *out, size_t len) -{ - const char before[] = "-./ >#"; - const char after[] = "______"; - - witness_fixup_string(in, out, len, before, strlen(before), after, - strlen(after)); -} - -static const char *w_notrunning = "Witness not running, witness_watch == 0\n"; -static const char *w_stillcold = "Witness is still cold\n"; - -#define BADSTACK_SBUF_SIZE (256 * WITNESS_COUNT) - -static int -sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS) -{ - int error, generation, i, j; - struct witness *w1, *w2, *tmp_w1 = NULL, *tmp_w2 = NULL; - struct witness_lock_order_data *data1, *data2; - struct witness_lock_order_data *tmp_data1 = NULL, *tmp_data2 = NULL; - struct sbuf *sb; - unsigned int w_rmatrix1, w_rmatrix2; +static int +sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS) +{ + struct witness_lock_order_data *data1, *data2, *tmp_data1, *tmp_data2; + struct witness *tmp_w1, *tmp_w2, *w1, *w2; + struct sbuf *sb; + u_int w_rmatrix1, w_rmatrix2; + int error, generation, i, j; + tmp_data1 = NULL; + tmp_data2 = NULL; + tmp_w1 = NULL; + tmp_w2 = NULL; if (witness_watch == 0) { error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning)); return (error); @@ -2392,18 +2276,20 @@ error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold)); return (error); } + error = 0; + sb = sbuf_new(NULL, NULL, BADSTACK_SBUF_SIZE, SBUF_AUTOEXTEND); + if (sb == NULL) + return (ENOMEM); /* Allocate and init temporary storage space. */ - tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK|M_ZERO); - tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK|M_ZERO); + tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO); + tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO); tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, - M_WAITOK|M_ZERO); + M_WAITOK | M_ZERO); tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, - M_WAITOK|M_ZERO); + M_WAITOK | M_ZERO); stack_zero(&tmp_data1->wlod_stack); stack_zero(&tmp_data2->wlod_stack); - error = 0; - sb = sbuf_new(NULL, NULL, BADSTACK_SBUF_SIZE, SBUF_AUTOEXTEND); restart: mtx_lock_spin(&w_mtx); @@ -2415,6 +2301,7 @@ mtx_lock_spin(&w_mtx); if (generation != w_generation) { mtx_unlock_spin(&w_mtx); + /* The graph has changed, try again. */ req->oldidx = 0; sbuf_clear(sb); @@ -2422,7 +2309,7 @@ } w1 = &w_data[i]; - if (!w1->w_badmalloc && !w1->w_reversed) { + if (w1->w_reversed == 0) { mtx_unlock_spin(&w_mtx); continue; } @@ -2431,20 +2318,16 @@ *tmp_w1 = *w1; mtx_unlock_spin(&w_mtx); - if (tmp_w1->w_badmalloc) { - sbuf_printf(sb, - "\nLock \"%s\"(%s) was held during malloc(M_WAITOK)\n", - tmp_w1->w_name, tmp_w1->w_class->lc_name); - } - if (!tmp_w1->w_reversed) + if (tmp_w1->w_reversed == 0) continue; for (j = 1; j < w_max_used_index; j++) { - if (!(w_rmatrix[i][j] & WITNESS_REVERSAL) || i > j) + if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j) continue; mtx_lock_spin(&w_mtx); if (generation != w_generation) { mtx_unlock_spin(&w_mtx); + /* The graph has changed, try again. */ req->oldidx = 0; sbuf_clear(sb); @@ -2476,18 +2359,18 @@ mtx_unlock_spin(&w_mtx); sbuf_printf(sb, - "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n", + "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n", tmp_w1->w_name, tmp_w1->w_class->lc_name, tmp_w2->w_name, tmp_w2->w_class->lc_name); #if 0 sbuf_printf(sb, - "w_rmatrix[%s][%s] == %x, w_rmatrix[%s][%s] == %x\n", + "w_rmatrix[%s][%s] == %x, w_rmatrix[%s][%s] == %x\n", tmp_w1->name, tmp_w2->w_name, w_rmatrix1, tmp_w2->name, tmp_w1->w_name, w_rmatrix2); #endif if (data1) { sbuf_printf(sb, - "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", + "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", tmp_w1->w_name, tmp_w1->w_class->lc_name, tmp_w2->w_name, tmp_w2->w_class->lc_name); stack_sbuf_print(sb, &tmp_data1->wlod_stack); @@ -2495,7 +2378,7 @@ } if (data2 && data2 != data1) { sbuf_printf(sb, - "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", + "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n", tmp_w2->w_name, tmp_w2->w_class->lc_name, tmp_w1->w_name, tmp_w1->w_class->lc_name); stack_sbuf_print(sb, &tmp_data2->wlod_stack); @@ -2506,6 +2389,7 @@ mtx_lock_spin(&w_mtx); if (generation != w_generation) { mtx_unlock_spin(&w_mtx); + /* * The graph changed while we were printing stack data, * try again. @@ -2516,583 +2400,23 @@ } mtx_unlock_spin(&w_mtx); - sbuf_finish(sb); - error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); - sbuf_delete(sb); - /* Free temporary storage space. */ free(tmp_data1, M_TEMP); free(tmp_data2, M_TEMP); free(tmp_w1, M_TEMP); free(tmp_w2, M_TEMP); - return error; -} - -/* - * Witness display graph code. When a graph display is requested, the internal - * 2D array is converted into a more programmer-friendly graph representation, - * transformed according to the kind of graph requested (full graph / cycles - * only) and printed out using dot notation. - */ - -struct witness_dotnode { - LIST_ENTRY(witness_dotnode) wn_list; /* linked list of nodes. */ - const char *wn_name; - struct lock_class *wn_class; - const char *wn_file; - u_int32_t wn_line; - u_int32_t wn_index; - u_int32_t wn_refcount; - u_int32_t wn_badmalloc:1; - u_int32_t wn_reversed:1; - short wn_level; /* Depth from root along longest path */ - short wn_vgen; /* Visit generation */ - int wn_flags; - int wn_indegree; /* number of incoming edges. */ - int wn_outdegree; /* number of outgoing edges. */ - struct witness_dotnode **wn_inlist; /* Adjacent incoming edges. */ - struct witness_dotnode **wn_outlist; /* Adjacent outgoing edges. */ -}; - -#define WITNESS_NODE_BAD 0x01 -LIST_HEAD(witness_dotnode_list, witness_dotnode); - -static void witness_dotnode_level_descendants(struct witness_dotnode *node, - int level, int vgen); - -struct witness_dotgraph { - struct witness_lock_order_data wg_lodata[WITNESS_LOCK_ORDER_DATA_COUNT]; - struct witness_dotnode_list wg_nodes; - u_int8_t wg_matrix[WITNESS_COUNT+1][WITNESS_COUNT+1]; - int wg_generation; - int wg_nodecount; - int wg_max_used_index; - int wg_mcount; /* Count of allocated objects. */ - int wg_fcount; /* Count of freed objects. */ - int wg_refcount; -}; - -static struct witness_dotgraph *w_last_full_graph = NULL; -static struct witness_dotgraph *w_last_cycle_graph = NULL; - -static struct witness_dotgraph *witness_fullgraph(void); -static struct witness_dotgraph *witness_cyclegraph(void); -//static void witness_cyclegraph(struct sbuf *sb); - -static void -witness_dotnode_free(struct witness_dotnode *node, struct malloc_type *type, int *fcount) -{ - int fc = 0; - - if (node == NULL) - return; - - if (node->wn_outlist) { - free(node->wn_outlist, type); - node->wn_outlist = NULL; - fc++; - } - if (node->wn_inlist) { - free(node->wn_inlist, type); - node->wn_inlist = NULL; - fc++; - } - free(node, type); - fc++; - if (fcount) - *fcount += fc; -} - -static struct witness_dotgraph * -witness_dotgraph_init(struct malloc_type *type, int *mcount) -{ - struct witness_dotgraph *graph; - - MPASS(!mtx_owned(&w_mtx)); - graph = malloc(sizeof(*graph), type, M_WAITOK|M_ZERO); - graph->wg_nodes.lh_first = NULL; - - if (mcount) - *mcount++; - return graph; -} - -static void -witness_dotgraph_destroy(struct witness_dotgraph *graph, struct malloc_type *type, int *fcount) -{ - struct witness_dotnode *node, *node2; - int fc = 0; - - if (!graph) - goto out; - node = LIST_FIRST(&graph->wg_nodes); - while (node != NULL) { - node2 = LIST_NEXT(node, wn_list); - witness_dotnode_free(node, type, &fc); - node = node2; - } - free(graph, type); - fc++; - if (fcount) - *fcount+= fc; -out: - return; -} - -static void -witness_dotgraph_print(struct sbuf *sb, const char *title, - struct witness_dotgraph *graph) -{ - struct witness_dotnode *node, *node2; - const char *color; - char buf[64], buf2[64]; - int i; - int bad_malloc = 0; - - MPASS(!mtx_owned(&w_mtx)); - - sbuf_printf(sb, "digraph \"%s\" {\n", title); - node = LIST_FIRST(&graph->wg_nodes); - while (node != NULL) { - witness_dotty_fixname(node->wn_name, buf, sizeof(buf)); - if (node->wn_reversed) - color = "red"; - else if (node->wn_badmalloc) { - color = "orange"; - bad_malloc = 1; - } else if (node->wn_refcount == 0) - color = "gray"; - else - color = "green"; - sbuf_printf(sb, "\"%s\" [shape=record, color=\"%s\", label=\"%s|" - "{type=%s|refcount=%u|level=%hd}\"];\n", buf, color, - buf, node->wn_class->lc_name, - node->wn_refcount, node->wn_level); - if (node->wn_badmalloc) - sbuf_printf(sb, "%s -> _bad_malloc_ [color=orange];\n", - buf); - for (i = 0; i < node->wn_outdegree; i++) { - node2 = node->wn_outlist[i]; - witness_dotty_fixname(node2->wn_name, buf2, - sizeof(buf2)); - if (graph->wg_matrix[node->wn_index][node2->wn_index] & - WITNESS_REVERSAL) - color = "red"; - else - color = "black"; - sbuf_printf(sb, "\"%s\" -> \"%s\" [color=\"%s\"];\n", buf, - buf2, color); - } - node = LIST_NEXT(node, wn_list); - } - if (bad_malloc) - sbuf_printf(sb, "_bad_malloc_ [shape=record, color=orange, label=\"" - "malloc(M_WAITOK)\"];\n"); - sbuf_printf(sb, "}\n"); -} - -static void -witness_dotnode_level_descendants(struct witness_dotnode *node, int level, int vgen) -{ - int i; - - if (node->wn_vgen == vgen) - return; - - node->wn_vgen = vgen; - if (node->wn_level < level) - node->wn_level = level; - for (i = 0; i < node->wn_outdegree; i++) { - witness_dotnode_level_descendants(node->wn_outlist[i], level + 1, - vgen); - } - node->wn_vgen--; /* For detecting cycles. */ -} - -/* - * Helper function used by witness_fullgraph() and witness_cyclegraph() to - * generate the initial struct witness_dotgraph. - */ -static struct witness_dotgraph * -witness_make_dotgraph(void) -{ - int i, n; - int mcount; /* count the number of mallocs */ - int fcount; /* count the number of frees */ - struct witness_dotnode **nodes = NULL; - struct witness_dotgraph *graph = NULL; - struct witness_dotnode *node; - - fcount = mcount = 0; - graph = witness_dotgraph_init(M_TEMP, &mcount); - - mcount++; - graph->wg_nodes.lh_first = NULL; - - mtx_lock_spin(&w_mtx); -restart: - graph->wg_generation = w_generation; - n = w_max_used_index; - mtx_unlock_spin(&w_mtx); - - if (!nodes) - mcount++; - - nodes = realloc(nodes, sizeof(struct witness_dotnode*) * (n + 1), M_TEMP, - M_WAITOK | M_ZERO); - for (i = 1; i <= n; i++) { - if (!nodes[i]) { - mcount++; - nodes[i] = malloc(sizeof(struct witness_dotnode), M_TEMP, - M_WAITOK|M_ZERO); - } - } - - mtx_lock_spin(&w_mtx); - /* - * If a new witness was added while we were allocating memory, we have - * to restart and allocate more. - */ - if (n != w_max_used_index || graph->wg_generation != w_generation) - goto restart; - for (i = 1; i <= n; i++) { - /* Copy the relevant fields into the struct witness_dotnode. */ - nodes[i]->wn_name = w_data[i].w_name; - nodes[i]->wn_index = w_data[i].w_index; - nodes[i]->wn_class = w_data[i].w_class; - nodes[i]->wn_file = w_data[i].w_file; - nodes[i]->wn_line = w_data[i].w_line; - nodes[i]->wn_refcount = w_data[i].w_refcount; - nodes[i]->wn_badmalloc = w_data[i].w_badmalloc; - nodes[i]->wn_reversed = w_data[i].w_reversed; - LIST_INSERT_HEAD(&graph->wg_nodes, nodes[i], wn_list); - graph->wg_nodecount++; - } - memcpy(graph->wg_lodata, w_lodata, sizeof(w_lodata)); - memcpy(graph->wg_matrix, w_rmatrix, sizeof(w_rmatrix)); - mtx_unlock_spin(&w_mtx); - - /* - * Ok, now we have a copy of the in-memory witness structures, and from - * them we can generate the dotty graph. - */ - - /* Examine all known edges, build sparse graph. */ - for (i = 0; i < WITNESS_LOCK_ORDER_DATA_COUNT; i++) { - int from, to; - struct witness_dotnode *from_node, *to_node; - - if (witness_lock_order_key_empty(&graph->wg_lodata[i].wlod_key)) - continue; - - from = graph->wg_lodata[i].wlod_key.from; - to = graph->wg_lodata[i].wlod_key.to; - - WITNESS_INDEX_ASSERT(from); - WITNESS_INDEX_ASSERT(to); - - from_node = nodes[from]; - to_node = nodes[to]; - - /* Don't add the edge (X,X) if DUPOK was set */ - if (from == to && - !(graph->wg_matrix[from][to] & WITNESS_REVERSAL)) - continue; - - /* Don't add the edge (Giant, X) if X is sleepable. The correct - * order is always (X, Giant) in this case. */ - if (strcmp("Giant", from_node->wn_name) == 0 && - (to_node->wn_class->lc_flags & LO_SLEEPABLE)) - continue; - - /* Add the edge (from, to) to the graph. */ - from_node->wn_outdegree++; - if (!from_node->wn_outlist) - mcount++; - from_node->wn_outlist = realloc(from_node->wn_outlist, - from_node->wn_outdegree * sizeof(struct witness_dotnode*), - M_TEMP, M_WAITOK); - from_node->wn_outlist[from_node->wn_outdegree - 1] = to_node; - to_node->wn_indegree++; - if (!to_node->wn_inlist) - mcount++; - to_node->wn_inlist = realloc(to_node->wn_inlist, - to_node->wn_indegree * sizeof(struct witness_dotnode*), - M_TEMP, M_WAITOK); - to_node->wn_inlist[to_node->wn_indegree - 1] = from_node; - } - - /* Compute the level for all nodes */ - node = LIST_FIRST(&graph->wg_nodes); - while (node != NULL) { - while (node && node->wn_indegree != 0) { - node = LIST_NEXT(node, wn_list); - } - /* There are none left */ - if (node == NULL) - break; - witness_dotnode_level_descendants(node, 1, 1); - node = LIST_NEXT(node, wn_list); - } - - - fcount++; - free(nodes, M_TEMP); - - graph->wg_mcount = mcount; - graph->wg_fcount = fcount; - graph->wg_refcount = 1; - return graph; -} - -struct witness_dotgraph * -witness_fullgraph(void) -{ - int mcount, fcount; - struct witness_dotgraph *graph = NULL; - - mcount = fcount = 0; - mtx_lock_spin(&w_mtx); - if (w_last_full_graph != NULL) { - /* Return the last up-to-date full graph if there is one. */ - if (w_generation == w_last_full_graph->wg_generation) { - graph = w_last_full_graph; - atomic_add_int(&graph->wg_refcount, 1); - mtx_unlock_spin(&w_mtx); - goto out; - } else { - graph = w_last_full_graph; - w_last_full_graph = NULL; - mcount = graph->wg_mcount; - fcount = graph->wg_fcount; - atomic_add_int(&graph->wg_refcount, -1); - mtx_unlock_spin(&w_mtx); - if (graph->wg_refcount == 0) - witness_dotgraph_destroy(graph, M_TEMP, - &fcount); - MPASS(mcount == fcount); - mcount = fcount = 0; - graph = NULL; - } - } else - mtx_unlock_spin(&w_mtx); - - /* Have to make a new graph. */ - graph = witness_make_dotgraph(); - - /* Store this graph if it's up to date ... */ - mtx_lock_spin(&w_mtx); - if (w_generation != graph->wg_generation) { - mtx_unlock_spin(&w_mtx); - goto out; - } - - /* .. and there isn't one stored already. */ - if (w_last_full_graph != NULL) { - mtx_unlock_spin(&w_mtx); - goto out; - } - - w_last_full_graph = graph; - atomic_add_int(&graph->wg_refcount, 1); - mtx_unlock_spin(&w_mtx); - -out: - return graph; -} - -struct witness_dotgraph * -witness_cyclegraph(void) -{ - int i, n, mcount, fcount; - struct witness_dotgraph *graph = NULL; - struct witness_dotnode_list doomed = LIST_HEAD_INITIALIZER(doomed); - struct witness_dotnode_list gone = LIST_HEAD_INITIALIZER(gone); - struct witness_dotnode *node, *node2; - - mcount = fcount = 0; - mtx_lock_spin(&w_mtx); - if (w_last_cycle_graph != NULL) { - /* Return the last up-to-date cycle graph if there is one. */ - if (w_generation == w_last_cycle_graph->wg_generation) { - graph = w_last_cycle_graph; - atomic_add_int(&graph->wg_refcount, 1); - mtx_unlock_spin(&w_mtx); - goto out; - } else { - graph = w_last_cycle_graph; - w_last_cycle_graph = NULL; - mcount = graph->wg_mcount; - fcount = graph->wg_fcount; - atomic_add_int(&graph->wg_refcount, -1); - mtx_unlock_spin(&w_mtx); - if (graph->wg_refcount == 0) - witness_dotgraph_destroy(graph, M_TEMP, - &fcount); - MPASS(mcount == fcount); - mcount = fcount = 0; - graph = NULL; - } - } else - mtx_unlock_spin(&w_mtx); - - /* Have to make a new graph. */ - graph = witness_make_dotgraph(); - - /* TODO: Reduce the graph to its' cycles */ - - /* - * Step 1: Build initial list of "doomed" nodes with in-degree or - * out-degree of 0. These are definitely not in the cycle graph. - */ - node = LIST_FIRST(&graph->wg_nodes); - while (node != NULL) { - while (node && node->wn_indegree != 0 && - node->wn_outdegree != 0) { - node = LIST_NEXT(node, wn_list); - } - /* There are none left */ - if (node == NULL) - break; - /* - * Found one w/ in-degree 0 and/or out-degree 0, - * move to doomed list. - */ - node2 = LIST_NEXT(node, wn_list); - LIST_REMOVE(node, wn_list); - LIST_INSERT_HEAD(&doomed, node, wn_list); - node = node2; - } - - /* - * Step 2: Remove nodes of in-degree or out-degree 0 until we no longer - * can. As we remove these doomed nodes, new nodes may become doomed if - * their in-degree or out-degree becomes 0. - */ - while ((node = LIST_FIRST(&doomed)) != NULL) { - struct witness_dotnode *to, *from; - int k; - - /* The out-degree and/or the in-degree better be 0 */ - MPASS(node->wn_outdegree == 0 || node->wn_indegree == 0); - /* Found a node that can be removed, change lists ... */ - LIST_REMOVE(node, wn_list); - LIST_INSERT_HEAD(&gone, node, wn_list); - graph->wg_nodecount--; - /* ... and remove the edges */ - if (node->wn_outdegree == 0) - goto next; - - MPASS(node->wn_indegree == 0); - for (i = 0, n = node->wn_outdegree; i < n; i++) { - to = node->wn_outlist[i]; - for (k = 0; k < to->wn_indegree; k++) { - if (to->wn_inlist[k] != node) - continue; - to->wn_inlist[k] = - to->wn_inlist[to->wn_indegree-1]; - to->wn_inlist[to->wn_indegree-1] = NULL; - } - to->wn_indegree--; - node->wn_outdegree--; - if (to->wn_indegree == 0) { - if (to->wn_inlist) { - graph->wg_fcount++; - free(to->wn_inlist, M_TEMP); - to->wn_inlist = NULL; - } - /* Move it to the doomed list */ - if (!(node->wn_flags & WITNESS_NODE_BAD)) { - LIST_REMOVE(to, wn_list); - LIST_INSERT_HEAD(&doomed, to, wn_list); - } - } - } - if (node->wn_outlist) { - graph->wg_fcount++; - free(node->wn_outlist, M_TEMP); - node->wn_outlist = NULL; - } - - next: - if (node->wn_indegree == 0) - continue; - - for (i = 0, n = node->wn_indegree; i < n; i++) { - from = node->wn_inlist[i]; - for (k = 0; k < from->wn_outdegree; k++) { - if (from->wn_outlist[k] != node) - continue; - from->wn_outlist[k] = - from->wn_outlist[from->wn_outdegree - 1]; - from->wn_outlist[from->wn_outdegree - 1] = NULL; - } - from->wn_outdegree--; - node->wn_indegree--; - if (from->wn_outdegree == 0) { - if (from->wn_outlist) { - graph->wg_fcount++; - free(from->wn_outlist, M_TEMP); - from->wn_outlist = NULL; - } - /* Move it to the doomed list */ - if (!(node->wn_flags & WITNESS_NODE_BAD)) { - LIST_REMOVE(from, wn_list); - LIST_INSERT_HEAD(&doomed, from, - wn_list); - } - } - } - if (node->wn_inlist) { - graph->wg_fcount++; - free(node->wn_inlist, M_TEMP); - node->wn_inlist = NULL; - } - } - - /* At this point, all nodes should be on the gone or graph lists */ - MPASS(LIST_EMPTY(&doomed)); - - /* - * Free all the nodes on the gone list, they are no longer a part - * of the graph. - */ - node = LIST_FIRST(&gone); - while (node != NULL) { - MPASS(node->wn_indegree == 0 && node->wn_inlist == NULL); - MPASS(node->wn_outdegree == 0 && node->wn_outlist == NULL); - node2 = LIST_NEXT(node, wn_list); - graph->wg_fcount++; - free(node, M_TEMP); - node = node2; - } - - /* Store this graph if it's up to date ... */ - mtx_lock_spin(&w_mtx); - if (w_generation != graph->wg_generation) { - mtx_unlock_spin(&w_mtx); - goto out; - } - - /* .. and there isn't one stored already. */ - if (w_last_cycle_graph != NULL) { - mtx_unlock_spin(&w_mtx); - goto out; - } - - w_last_cycle_graph = graph; - atomic_add_int(&graph->wg_refcount, 1); - mtx_unlock_spin(&w_mtx); + sbuf_finish(sb); + error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); -out: - return graph; + return (error); } static int sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS) { - struct witness_dotgraph *wg; + struct witness *w; struct sbuf *sb; int error; @@ -3104,77 +2428,82 @@ error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold)); return (error); } - sb = sbuf_new(NULL, NULL, 8 * 1024, SBUF_AUTOEXTEND); - wg = witness_fullgraph(); - witness_dotgraph_print(sb, "WITNESS graph", wg); - - atomic_add_int(&wg->wg_refcount, -1); - if (wg->wg_refcount == 0) { - int mcount, fcount; - mcount = wg->wg_mcount; - fcount = wg->wg_fcount; + error = 0; + sb = sbuf_new(NULL, NULL, FULLGRAPH_SBUF_SIZE, SBUF_FIXEDLEN); + if (sb == NULL) + return (ENOMEM); + sbuf_printf(sb, "\n"); + + mtx_lock_spin(&w_mtx); + STAILQ_FOREACH(w, &w_all, w_list) + w->w_displayed = 0; + STAILQ_FOREACH(w, &w_all, w_list) + witness_add_fullgraph(sb, w); + mtx_unlock_spin(&w_mtx); - witness_dotgraph_destroy(wg, M_TEMP, &fcount); - MPASS(mcount == fcount); + /* + * While using SBUF_FIXEDLEN, check if the sbuf overflowed. + */ + if (sbuf_overflowed(sb)) { + sbuf_delete(sb); + panic("%s: sbuf overflowed, bump FULLGRAPH_SBUF_SIZE value\n", + __func__); } + /* + * Close the sbuf and return to userland. + */ sbuf_finish(sb); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); - return error; + return (error); } - static int -sysctl_debug_witness_cyclegraph(SYSCTL_HANDLER_ARGS) +sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) { - struct witness_dotgraph *wg; - struct sbuf *sb; - int error; + int error, value; - if (witness_watch == 0) { - error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning)); - return (error); - } - if (witness_cold) { - error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold)); + value = witness_watch; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error != 0 || req->newptr == NULL) return (error); - } - - sb = sbuf_new(NULL, NULL, 8 * 1024, SBUF_AUTOEXTEND); - wg = witness_cyclegraph(); - witness_dotgraph_print(sb, "WITNESS cycle graph", wg); - - atomic_add_int(&wg->wg_refcount, -1); - if (wg->wg_refcount == 0) { - int mcount, fcount; - mcount = wg->wg_mcount; - fcount = wg->wg_fcount; + if (value == witness_watch) + return (0); + if (value != 0) + return (EINVAL); + witness_watch = 0; + return (0); +} - witness_dotgraph_destroy(wg, M_TEMP, &fcount); - MPASS(mcount == fcount); - } +static void +witness_add_fullgraph(struct sbuf *sb, struct witness *w) +{ + int i; - sbuf_finish(sb); - error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); - sbuf_delete(sb); + if (w->w_displayed != 0 || (w->w_file == NULL && w->w_line == 0)) + return; + w->w_displayed = 1; - return error; + WITNESS_INDEX_ASSERT(w->w_index); + for (i = 1; i <= w_max_used_index; i++) { + if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { + sbuf_printf(sb, "\"%s\",\"%s\"\n", w->w_name, + w_data[i].w_name); + witness_add_fullgraph(sb, &w_data[i]); + } + } } -/******************* - * HASH TABLE CODE * - *******************/ - /* * A simple hash function. Takes a key pointer and a key size. If size == 0, * interprets the key as a string and reads until the null * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit * hash value computed from the key. */ -static u_int32_t -witness_hash_djb2(const u_int8_t *key, u_int32_t size) +static uint32_t +witness_hash_djb2(const uint8_t *key, uint32_t size) { unsigned int hash = 5381; int i; @@ -3187,7 +2516,7 @@ for (i = 0; key[i] != 0; i++) hash = ((hash << 5) + hash) + (unsigned int)key[i]; - return hash; + return (hash); } @@ -3201,23 +2530,24 @@ int i; MPASS(witness_cold); - /* Init the hash tables. First the witness hash ... */ + + /* Initialize the hash tables. */ for (i = 0; i < WITNESS_HASH_SIZE; i++) w_hash.wh_array[i] = NULL; w_hash.wh_size = WITNESS_HASH_SIZE; w_hash.wh_count = 0; - /* ... then the lock order data hash */ + /* Initialize the lock order data hash. */ w_lofree = NULL; - for (i = 0; i < WITNESS_LOCK_ORDER_DATA_COUNT; i++) { + for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) { memset(&w_lodata[i], 0, sizeof(w_lodata[i])); w_lodata[i].wlod_next = w_lofree; w_lofree = &w_lodata[i]; } - w_lohash.wloh_size = WITNESS_LOCK_ORDER_HASH_SIZE; + w_lohash.wloh_size = WITNESS_LO_HASH_SIZE; w_lohash.wloh_count = 0; - for (i = 0; i < WITNESS_LOCK_ORDER_HASH_SIZE; i++) + for (i = 0; i < WITNESS_LO_HASH_SIZE; i++) w_lohash.wloh_array[i] = NULL; } @@ -3225,11 +2555,11 @@ witness_hash_get(const char *key) { struct witness *w; - u_int32_t hash; - - MPASS(mtx_owned(&w_mtx) || witness_cold); - MPASS(key != NULL); + uint32_t hash; + MPASS(key != NULL); + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); hash = witness_hash_djb2(key, 0) % w_hash.wh_size; w = w_hash.wh_array[hash]; while (w != NULL) { @@ -3239,23 +2569,24 @@ } out: - return w; + return (w); } static void witness_hash_put(struct witness *w) { - u_int32_t hash; + uint32_t hash; - MPASS(mtx_owned(&w_mtx) || witness_cold); MPASS(w != NULL); MPASS(w->w_name != NULL); + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); KASSERT(witness_hash_get(w->w_name) == NULL, ("%s: trying to add a hash entry that already exists!", __func__)); - KASSERT(w->w_hash_next == NULL, ("%s: w->w_hash_next != NULL", __func__)); + KASSERT(w->w_hash_next == NULL, + ("%s: w->w_hash_next != NULL", __func__)); hash = witness_hash_djb2(w->w_name, 0) % w_hash.wh_size; - w->w_hash_next = w_hash.wh_array[hash]; w_hash.wh_array[hash] = w; w_hash.wh_count++; @@ -3274,8 +2605,8 @@ key.to = child->w_index; WITNESS_INDEX_ASSERT(key.from); WITNESS_INDEX_ASSERT(key.to); - if (!(w_rmatrix[parent->w_index][child->w_index] - & WITNESS_LOCK_ORDER_KNOWN)) + if ((w_rmatrix[parent->w_index][child->w_index] + & WITNESS_LOCK_ORDER_KNOWN) == 0) goto out; hash = witness_hash_djb2((const char*)&key, @@ -3288,7 +2619,7 @@ } out: - return data; + return (data); } /* @@ -3304,7 +2635,7 @@ w_rmatrix[parent->w_index][child->w_index] & WITNESS_LOCK_ORDER_KNOWN && isitmychild(parent, child)) - return 1; + return (1); return (0); } @@ -3323,14 +2654,14 @@ WITNESS_INDEX_ASSERT(key.to); if (w_rmatrix[parent->w_index][child->w_index] & WITNESS_LOCK_ORDER_KNOWN) - return 1; + return (1); hash = witness_hash_djb2((const char*)&key, sizeof(key)) % w_lohash.wloh_size; w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; data = w_lofree; if (data == NULL) - return 0; + return (0); w_lofree = data->wlod_next; data->wlod_next = w_lohash.wloh_array[hash]; data->wlod_key = key; @@ -3338,25 +2669,27 @@ w_lohash.wloh_count++; stack_zero(&data->wlod_stack); stack_save(&data->wlod_stack); - return 1; + return (1); +} + +/* Call this whenver the structure of the witness graph changes. */ +static void +witness_increment_graph_generation(void) +{ + + if (witness_cold == 0) + mtx_assert(&w_mtx, MA_OWNED); + w_generation++; } #ifdef KDB static void -witness_debugger(int cond, const char *msg) +_witness_debugger(int cond, const char *msg) { + if (witness_trace && cond) kdb_backtrace(); if (witness_kdb && cond) kdb_enter(msg); } #endif - -/* Call this whenver the structure of the witness graph changes. */ -static void -witness_increment_graph_generation(void) -{ - MPASS(mtx_owned(&w_mtx) || witness_cold); - w_generation++; -} -