commit ff33fc5747e8cf5524571f268f0831d9dfd8979d Author: Andrew Gallatin Date: Wed Oct 2 14:04:02 2019 -0400 Initial support for KUtrace KUtrace is a efficient tracing tool that traces kernel/user transitions. It makes it possible to know exactly what is happening on the system with very low (<1%) overhead. https://queue.acm.org/detail.cfm?id=3291278 diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index ffe7a4fcbe9c..8973cf72e610 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -103,6 +103,8 @@ PMC_SOFT_DEFINE( , , page_fault, write); #include #endif +#include + extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); @@ -343,8 +345,9 @@ trap(struct trapframe *frame) if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; - + kutrace1(KUTRACE_TRAP + KUTRACE_PAGEFAULT, 0) pf = trap_pfault(frame, true, &signo, &ucode); + kutrace1(KUTRACE_TRAPRET + KUTRACE_PAGEFAULT, 0) if (pf == -1) return; if (pf == 0) @@ -404,7 +407,9 @@ trap(struct trapframe *frame) ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ + kutrace1(KUTRACE_TRAP + KUTRACE_PAGEFAULT, 0) (void)trap_pfault(frame, false, NULL, NULL); + kutrace1(KUTRACE_TRAPRET + KUTRACE_PAGEFAULT, 0) return; case T_DNA: @@ -1063,6 +1068,8 @@ cpu_fetch_syscall_args(struct thread *td) return (0); } +#define KUTRACE_TABLE + #include "../../kern/subr_syscall.c" static void (*syscall_ret_l1d_flush)(void); diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64 index 8939ddaf6246..2d71ee974d65 100644 --- a/sys/conf/options.amd64 +++ b/sys/conf/options.amd64 @@ -68,3 +68,4 @@ ISCI_LOGGING opt_isci.h # EFI Runtime services support EFIRT opt_efirt.h +KUTRACE opt_global.h diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index e33f12e40382..285c2b276fbc 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef HWPMC_HOOKS #include @@ -2067,6 +2068,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(newtd == NULL, ("sched_switch: Unsupported newtd argument")); + kutrace1(KUTRACE_SYSCALL64 + KUTRACE_SCHEDSYSCALL, 0); cpuid = PCPU_GET(cpuid); tdq = TDQ_SELF(); ts = td_get_sched(td); @@ -2155,7 +2157,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (dtrace_vtime_active) (*dtrace_vtime_switch_func)(newtd); #endif - + kutrace_pidname(newtd); + kutrace1(KUTRACE_USERPID, newtd->td_tid); cpu_switch(td, newtd, mtx); /* * We may return from cpu_switch on a different cpu. However, @@ -2186,6 +2189,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) TDQ_LOCK_ASSERT(tdq, MA_OWNED|MA_NOTRECURSED); MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); td->td_oncpu = cpuid; + kutrace1(KUTRACE_SYSRET64 + KUTRACE_SCHEDSYSCALL, 0); } /* @@ -2237,6 +2241,7 @@ sched_wakeup(struct thread *td) struct td_sched *ts; int slptick; + kutrace1(KUTRACE_RUNNABLE, td->td_tid); THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td_get_sched(td); td->td_flags &= ~TDF_CANSWAP; @@ -2398,6 +2403,7 @@ sched_preempt(struct thread *td) else mi_switch(flags | SWT_REMOTEPREEMPT, NULL); } + kutrace1(KUTRACE_IRQRET + RESCHEDULE_VECTOR, 0); thread_unlock(td); } diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c index 951e7b682623..e8d7efb3b447 100644 --- a/sys/kern/subr_syscall.c +++ b/sys/kern/subr_syscall.c @@ -53,6 +53,589 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#include + +#ifdef KUTRACE +#ifdef KUTRACE_TABLE +/* + * FreeBSD has ~410 syscalls spread over a range from 0..569 + * Implement a LUT to make the space dense + */ +uint16_t kutrace_syscall_map[SYS_MAXSYSCALL] = { + 1, /* 0 syscall */ + 2, /* 1 exit */ + 3, /* 2 fork */ + 4, /* 3 read */ + 5, /* 4 write */ + 6, /* 5 open */ + 7, /* 6 close */ + 8, /* 7 wait4 */ + 0, /* 9 nosys */ + 9, /* 9 link */ + 10, /* 10 unlink */ + 0, /* 12 nosys */ + 11, /* 12 chdir */ + 12, /* 13 fchdir */ + 13, /* 14 freebsd11_mknod */ + 14, /* 15 chmod */ + 15, /* 16 chown */ + 16, /* 17 break */ + 0, /* 20 nosys */ + 0, /* 20 nosys */ + 17, /* 20 getpid */ + 18, /* 21 mount */ + 19, /* 22 unmount */ + 20, /* 23 setuid */ + 21, /* 24 getuid */ + 22, /* 25 geteuid */ + 23, /* 26 ptrace */ + 24, /* 27 recvmsg */ + 25, /* 28 sendmsg */ + 26, /* 29 recvfrom */ + 27, /* 30 accept */ + 28, /* 31 getpeername */ + 29, /* 32 getsockname */ + 30, /* 33 access */ + 31, /* 34 chflags */ + 32, /* 35 fchflags */ + 33, /* 36 sync */ + 34, /* 37 kill */ + 0, /* 39 nosys */ + 35, /* 39 getppid */ + 0, /* 41 nosys */ + 36, /* 41 dup */ + 37, /* 42 freebsd10_pipe */ + 38, /* 43 getegid */ + 39, /* 44 profil */ + 40, /* 45 ktrace */ + 0, /* 47 nosys */ + 41, /* 47 getgid */ + 0, /* 49 nosys */ + 42, /* 49 getlogin */ + 43, /* 50 setlogin */ + 44, /* 51 acct */ + 0, /* 53 nosys */ + 45, /* 53 sigaltstack */ + 46, /* 54 ioctl */ + 47, /* 55 reboot */ + 48, /* 56 revoke */ + 49, /* 57 symlink */ + 50, /* 58 readlink */ + 51, /* 59 execve */ + 52, /* 60 umask */ + 53, /* 61 chroot */ + 0, /* 65 nosys */ + 0, /* 65 nosys */ + 0, /* 65 nosys */ + 54, /* 65 msync */ + 55, /* 66 vfork */ + 0, /* 69 nosys */ + 0, /* 69 nosys */ + 56, /* 69 sbrk */ + 57, /* 70 sstk */ + 0, /* 72 nosys */ + 58, /* 72 freebsd11_vadvise */ + 59, /* 73 munmap */ + 60, /* 74 mprotect */ + 61, /* 75 madvise */ + 0, /* 78 nosys */ + 0, /* 78 nosys */ + 62, /* 78 mincore */ + 63, /* 79 getgroups */ + 64, /* 80 setgroups */ + 65, /* 81 getpgrp */ + 66, /* 82 setpgid */ + 67, /* 83 setitimer */ + 0, /* 85 nosys */ + 68, /* 85 swapon */ + 69, /* 86 getitimer */ + 0, /* 89 nosys */ + 0, /* 89 nosys */ + 70, /* 89 getdtablesize */ + 71, /* 90 dup2 */ + 0, /* 92 nosys */ + 72, /* 92 fcntl */ + 73, /* 93 select */ + 0, /* 95 nosys */ + 74, /* 95 fsync */ + 75, /* 96 setpriority */ + 76, /* 97 socket */ + 77, /* 98 connect */ + 0, /* 100 nosys */ + 78, /* 100 getpriority */ + 0, /* 104 nosys */ + 0, /* 104 nosys */ + 0, /* 104 nosys */ + 79, /* 104 bind */ + 80, /* 105 setsockopt */ + 81, /* 106 listen */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 0, /* 116 nosys */ + 82, /* 116 gettimeofday */ + 83, /* 117 getrusage */ + 84, /* 118 getsockopt */ + 0, /* 120 nosys */ + 85, /* 120 readv */ + 86, /* 121 writev */ + 87, /* 122 settimeofday */ + 88, /* 123 fchown */ + 89, /* 124 fchmod */ + 0, /* 126 nosys */ + 90, /* 126 setreuid */ + 91, /* 127 setregid */ + 92, /* 128 rename */ + 0, /* 131 nosys */ + 0, /* 131 nosys */ + 93, /* 131 flock */ + 94, /* 132 mkfifo */ + 95, /* 133 sendto */ + 96, /* 134 shutdown */ + 97, /* 135 socketpair */ + 98, /* 136 mkdir */ + 99, /* 137 rmdir */ + 100, /* 138 utimes */ + 0, /* 140 nosys */ + 101, /* 140 adjtime */ + 0, /* 147 nosys */ + 0, /* 147 nosys */ + 0, /* 147 nosys */ + 0, /* 147 nosys */ + 0, /* 147 nosys */ + 0, /* 147 nosys */ + 102, /* 147 setsid */ + 103, /* 148 quotactl */ + 0, /* 154 nosys */ + 0, /* 154 nosys */ + 0, /* 154 nosys */ + 0, /* 154 nosys */ + 0, /* 154 nosys */ + 104, /* 154 nlm_syscall */ + 105, /* 155 nfssvc */ + 0, /* 160 nosys */ + 0, /* 160 nosys */ + 0, /* 160 nosys */ + 0, /* 160 nosys */ + 106, /* 160 lgetfh */ + 107, /* 161 getfh */ + 0, /* 165 nosys */ + 0, /* 165 nosys */ + 0, /* 165 nosys */ + 108, /* 165 sysarch */ + 109, /* 166 rtprio */ + 0, /* 169 nosys */ + 0, /* 169 nosys */ + 110, /* 169 semsys */ + 111, /* 170 msgsys */ + 112, /* 171 shmsys */ + 0, /* 175 nosys */ + 0, /* 175 nosys */ + 0, /* 175 nosys */ + 113, /* 175 setfib */ + 114, /* 176 ntp_adjtime */ + 0, /* 181 nosys */ + 0, /* 181 nosys */ + 0, /* 181 nosys */ + 0, /* 181 nosys */ + 115, /* 181 setgid */ + 116, /* 182 setegid */ + 117, /* 183 seteuid */ + 0, /* 188 nosys */ + 0, /* 188 nosys */ + 0, /* 188 nosys */ + 0, /* 188 nosys */ + 118, /* 188 freebsd11_stat */ + 119, /* 189 freebsd11_fstat */ + 120, /* 190 freebsd11_lstat */ + 121, /* 191 pathconf */ + 122, /* 192 fpathconf */ + 0, /* 194 nosys */ + 123, /* 194 getrlimit */ + 124, /* 195 setrlimit */ + 125, /* 196 freebsd11_getdirentries */ + 0, /* 198 nosys */ + 126, /* 198 __syscall */ + 0, /* 202 nosys */ + 0, /* 202 nosys */ + 0, /* 202 nosys */ + 127, /* 202 __sysctl */ + 128, /* 203 mlock */ + 129, /* 204 munlock */ + 130, /* 205 undelete */ + 131, /* 206 futimes */ + 132, /* 207 getpgid */ + 0, /* 209 nosys */ + 133, /* 209 poll */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 0, /* 220 nosys */ + 134, /* 220 freebsd7___semctl */ + 135, /* 221 semget */ + 136, /* 222 semop */ + 0, /* 224 nosys */ + 137, /* 224 freebsd7_msgctl */ + 138, /* 225 msgget */ + 139, /* 226 msgsnd */ + 140, /* 227 msgrcv */ + 141, /* 228 shmat */ + 142, /* 229 freebsd7_shmctl */ + 143, /* 230 shmdt */ + 144, /* 231 shmget */ + 145, /* 232 clock_gettime */ + 146, /* 233 clock_settime */ + 147, /* 234 clock_getres */ + 148, /* 235 ktimer_create */ + 149, /* 236 ktimer_delete */ + 150, /* 237 ktimer_settime */ + 151, /* 238 ktimer_gettime */ + 152, /* 239 ktimer_getoverrun */ + 153, /* 240 nanosleep */ + 154, /* 241 ffclock_getcounter */ + 155, /* 242 ffclock_setestimate */ + 156, /* 243 ffclock_getestimate */ + 157, /* 244 clock_nanosleep */ + 0, /* 247 nosys */ + 0, /* 247 nosys */ + 158, /* 247 clock_getcpuclockid2 */ + 159, /* 248 ntp_gettime */ + 0, /* 250 nosys */ + 160, /* 250 minherit */ + 161, /* 251 rfork */ + 0, /* 253 nosys */ + 162, /* 253 issetugid */ + 163, /* 254 lchown */ + 164, /* 255 aio_read */ + 165, /* 256 aio_write */ + 166, /* 257 lio_listio */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 0, /* 272 nosys */ + 167, /* 272 freebsd11_getdents */ + 0, /* 274 nosys */ + 168, /* 274 lchmod */ + 0, /* 276 nosys */ + 169, /* 276 lutimes */ + 0, /* 278 nosys */ + 170, /* 278 freebsd11_nstat */ + 171, /* 279 freebsd11_nfstat */ + 172, /* 280 freebsd11_nlstat */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 0, /* 289 nosys */ + 173, /* 289 preadv */ + 174, /* 290 pwritev */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 0, /* 298 nosys */ + 175, /* 298 fhopen */ + 176, /* 299 freebsd11_fhstat */ + 177, /* 300 modnext */ + 178, /* 301 modstat */ + 179, /* 302 modfnext */ + 180, /* 303 modfind */ + 181, /* 304 kldload */ + 182, /* 305 kldunload */ + 183, /* 306 kldfind */ + 184, /* 307 kldnext */ + 185, /* 308 kldstat */ + 186, /* 309 kldfirstmod */ + 187, /* 310 getsid */ + 188, /* 311 setresuid */ + 189, /* 312 setresgid */ + 0, /* 314 nosys */ + 190, /* 314 aio_return */ + 191, /* 315 aio_suspend */ + 192, /* 316 aio_cancel */ + 193, /* 317 aio_error */ + 0, /* 321 nosys */ + 0, /* 321 nosys */ + 0, /* 321 nosys */ + 194, /* 321 yield */ + 0, /* 324 nosys */ + 0, /* 324 nosys */ + 195, /* 324 mlockall */ + 196, /* 325 munlockall */ + 197, /* 326 __getcwd */ + 198, /* 327 sched_setparam */ + 199, /* 328 sched_getparam */ + 200, /* 329 sched_setscheduler */ + 201, /* 330 sched_getscheduler */ + 202, /* 331 sched_yield */ + 203, /* 332 sched_get_priority_max */ + 204, /* 333 sched_get_priority_min */ + 205, /* 334 sched_rr_get_interval */ + 206, /* 335 utrace */ + 0, /* 337 nosys */ + 207, /* 337 kldsym */ + 208, /* 338 jail */ + 209, /* 339 nnpfs_syscall */ + 210, /* 340 sigprocmask */ + 211, /* 341 sigsuspend */ + 0, /* 343 nosys */ + 212, /* 343 sigpending */ + 0, /* 345 nosys */ + 213, /* 345 sigtimedwait */ + 214, /* 346 sigwaitinfo */ + 215, /* 347 __acl_get_file */ + 216, /* 348 __acl_set_file */ + 217, /* 349 __acl_get_fd */ + 218, /* 350 __acl_set_fd */ + 219, /* 351 __acl_delete_file */ + 220, /* 352 __acl_delete_fd */ + 221, /* 353 __acl_aclcheck_file */ + 222, /* 354 __acl_aclcheck_fd */ + 223, /* 355 extattrctl */ + 224, /* 356 extattr_set_file */ + 225, /* 357 extattr_get_file */ + 226, /* 358 extattr_delete_file */ + 227, /* 359 aio_waitcomplete */ + 228, /* 360 getresuid */ + 229, /* 361 getresgid */ + 230, /* 362 kqueue */ + 231, /* 363 freebsd11_kevent */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 0, /* 371 nosys */ + 232, /* 371 extattr_set_fd */ + 233, /* 372 extattr_get_fd */ + 234, /* 373 extattr_delete_fd */ + 235, /* 374 __setugid */ + 0, /* 376 nosys */ + 236, /* 376 eaccess */ + 237, /* 377 afs3_syscall */ + 238, /* 378 nmount */ + 0, /* 384 nosys */ + 0, /* 384 nosys */ + 0, /* 384 nosys */ + 0, /* 384 nosys */ + 0, /* 384 nosys */ + 239, /* 384 __mac_get_proc */ + 240, /* 385 __mac_set_proc */ + 241, /* 386 __mac_get_fd */ + 242, /* 387 __mac_get_file */ + 243, /* 388 __mac_set_fd */ + 244, /* 389 __mac_set_file */ + 245, /* 390 kenv */ + 246, /* 391 lchflags */ + 247, /* 392 uuidgen */ + 248, /* 393 sendfile */ + 249, /* 394 mac_syscall */ + 250, /* 395 freebsd11_getfsstat */ + 251, /* 396 freebsd11_statfs */ + 252, /* 397 freebsd11_fstatfs */ + 253, /* 398 freebsd11_fhstatfs */ + 0, /* 400 nosys */ + 254, /* 400 ksem_close */ + 255, /* 401 ksem_post */ + 256, /* 402 ksem_wait */ + 257, /* 403 ksem_trywait */ + 258, /* 404 ksem_init */ + 259, /* 405 ksem_open */ + 260, /* 406 ksem_unlink */ + 261, /* 407 ksem_getvalue */ + 262, /* 408 ksem_destroy */ + 263, /* 409 __mac_get_pid */ + 264, /* 410 __mac_get_link */ + 265, /* 411 __mac_set_link */ + 266, /* 412 extattr_set_link */ + 267, /* 413 extattr_get_link */ + 268, /* 414 extattr_delete_link */ + 269, /* 415 __mac_execve */ + 270, /* 416 sigaction */ + 271, /* 417 sigreturn */ + 0, /* 421 nosys */ + 0, /* 421 nosys */ + 0, /* 421 nosys */ + 272, /* 421 getcontext */ + 273, /* 422 setcontext */ + 274, /* 423 swapcontext */ + 275, /* 424 swapoff */ + 276, /* 425 __acl_get_link */ + 277, /* 426 __acl_set_link */ + 278, /* 427 __acl_delete_link */ + 279, /* 428 __acl_aclcheck_link */ + 280, /* 429 sigwait */ + 281, /* 430 thr_create */ + 282, /* 431 thr_exit */ + 283, /* 432 thr_self */ + 284, /* 433 thr_kill */ + 0, /* 436 nosys */ + 0, /* 436 nosys */ + 285, /* 436 jail_attach */ + 286, /* 437 extattr_list_fd */ + 287, /* 438 extattr_list_file */ + 288, /* 439 extattr_list_link */ + 0, /* 441 nosys */ + 289, /* 441 ksem_timedwait */ + 290, /* 442 thr_suspend */ + 291, /* 443 thr_wake */ + 292, /* 444 kldunloadf */ + 293, /* 445 audit */ + 294, /* 446 auditon */ + 295, /* 447 getauid */ + 296, /* 448 setauid */ + 297, /* 449 getaudit */ + 298, /* 450 setaudit */ + 299, /* 451 getaudit_addr */ + 300, /* 452 setaudit_addr */ + 301, /* 453 auditctl */ + 302, /* 454 _umtx_op */ + 303, /* 455 thr_new */ + 304, /* 456 sigqueue */ + 305, /* 457 kmq_open */ + 306, /* 458 kmq_setattr */ + 307, /* 459 kmq_timedreceive */ + 308, /* 460 kmq_timedsend */ + 309, /* 461 kmq_notify */ + 310, /* 462 kmq_unlink */ + 311, /* 463 abort2 */ + 312, /* 464 thr_set_name */ + 313, /* 465 aio_fsync */ + 314, /* 466 rtprio_thread */ + 0, /* 471 nosys */ + 0, /* 471 nosys */ + 0, /* 471 nosys */ + 0, /* 471 nosys */ + 315, /* 471 sctp_peeloff */ + 316, /* 472 sctp_generic_sendmsg */ + 317, /* 473 sctp_generic_sendmsg_iov */ + 318, /* 474 sctp_generic_recvmsg */ + 319, /* 475 pread */ + 320, /* 476 pwrite */ + 321, /* 477 mmap */ + 322, /* 478 lseek */ + 323, /* 479 truncate */ + 324, /* 480 ftruncate */ + 325, /* 481 thr_kill2 */ + 326, /* 482 shm_open */ + 327, /* 483 shm_unlink */ + 328, /* 484 cpuset */ + 329, /* 485 cpuset_setid */ + 330, /* 486 cpuset_getid */ + 331, /* 487 cpuset_getaffinity */ + 332, /* 488 cpuset_setaffinity */ + 333, /* 489 faccessat */ + 334, /* 490 fchmodat */ + 335, /* 491 fchownat */ + 336, /* 492 fexecve */ + 337, /* 493 freebsd11_fstatat */ + 338, /* 494 futimesat */ + 339, /* 495 linkat */ + 340, /* 496 mkdirat */ + 341, /* 497 mkfifoat */ + 342, /* 498 freebsd11_mknodat */ + 343, /* 499 openat */ + 344, /* 500 readlinkat */ + 345, /* 501 renameat */ + 346, /* 502 symlinkat */ + 347, /* 503 unlinkat */ + 348, /* 504 posix_openpt */ + 349, /* 505 gssd_syscall */ + 350, /* 506 jail_get */ + 351, /* 507 jail_set */ + 352, /* 508 jail_remove */ + 353, /* 509 closefrom */ + 354, /* 510 __semctl */ + 355, /* 511 msgctl */ + 356, /* 512 shmctl */ + 357, /* 513 lpathconf */ + 0, /* 515 nosys */ + 358, /* 515 __cap_rights_get */ + 359, /* 516 cap_enter */ + 360, /* 517 cap_getmode */ + 361, /* 518 pdfork */ + 362, /* 519 pdkill */ + 363, /* 520 pdgetpid */ + 0, /* 522 nosys */ + 364, /* 522 pselect */ + 365, /* 523 getloginclass */ + 366, /* 524 setloginclass */ + 367, /* 525 rctl_get_racct */ + 368, /* 526 rctl_get_rules */ + 369, /* 527 rctl_get_limits */ + 370, /* 528 rctl_add_rule */ + 371, /* 529 rctl_remove_rule */ + 372, /* 530 posix_fallocate */ + 373, /* 531 posix_fadvise */ + 374, /* 532 wait6 */ + 375, /* 533 cap_rights_limit */ + 376, /* 534 cap_ioctls_limit */ + 377, /* 535 cap_ioctls_get */ + 378, /* 536 cap_fcntls_limit */ + 379, /* 537 cap_fcntls_get */ + 380, /* 538 bindat */ + 381, /* 539 connectat */ + 382, /* 540 chflagsat */ + 383, /* 541 accept4 */ + 384, /* 542 pipe2 */ + 385, /* 543 aio_mlock */ + 386, /* 544 procctl */ + 387, /* 545 ppoll */ + 388, /* 546 futimens */ + 389, /* 547 utimensat */ + 0, /* 550 nosys */ + 0, /* 550 nosys */ + 390, /* 550 fdatasync */ + 391, /* 551 fstat */ + 392, /* 552 fstatat */ + 393, /* 553 fhstat */ + 394, /* 554 getdirentries */ + 395, /* 555 statfs */ + 396, /* 556 fstatfs */ + 397, /* 557 getfsstat */ + 398, /* 558 fhstatfs */ + 399, /* 559 mknodat */ + 400, /* 560 kevent */ + 401, /* 561 cpuset_getdomain */ + 402, /* 562 cpuset_setdomain */ + 403, /* 563 getrandom */ + 404, /* 564 getfhat */ + 405, /* 565 fhlink */ + 406, /* 566 fhlinkat */ + 407, /* 567 fhreadlink */ + 408, /* 568 funlinkat */ + 409, /* 569 copy_file_range */ + 410, /* 570 MAXSYSCALL */ +}; +#endif +#endif static inline void syscallenter(struct thread *td) @@ -89,6 +672,8 @@ syscallenter(struct thread *td) td->td_errno = error; goto retval; } + kutrace1(KUTRACE_SYSCALL64 + kutrace_map_nr(sa->code), + sa->args[0] & 0xffffUL); STOPEVENT(p, S_SCE, sa->narg); if ((p->p_flag & P_TRACED) != 0) { @@ -167,6 +752,8 @@ syscallenter(struct thread *td) PROC_UNLOCK(p); } (p->p_sysent->sv_set_syscall_retval)(td, error); + kutrace1(KUTRACE_SYSRET64 + kutrace_map_nr(sa->code), + error & 0xffffUL); } static inline void diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 7c162ee2d7c2..68100de61182 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -86,6 +86,13 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef KUTRACE +#include +struct kutrace_ops kutrace_global_ops; +bool kutrace_tracing; +uint64_t* kutrace_pid_filter; +#endif + #include void (*softdep_ast_cleanup)(struct thread *); diff --git a/sys/sys/kutrace.h b/sys/sys/kutrace.h new file mode 100644 index 000000000000..f6f457ffe9f6 --- /dev/null +++ b/sys/sys/kutrace.h @@ -0,0 +1,170 @@ +/* + * kutrace_mod.c + * + * Copyright (C) 2019 Richard L. Sites + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * A module that implements kernel/user tracing + * dsites 2019.02.19 + * + * See include/linux/kutrace.h for struct definitions + * + * Most patches will be something like + * kutrace1(event, arg) + * + */ + +/* + * hooks for kernel/user tracing + * dsites 2019.02.14 + * + */ + +#ifndef KUTRACE_H +#define KUTRACE_H + +#include + +/* Updated 2019.03.03 to allow 64-bit syscalls 0..510 and */ +/* 32-bit syscalls 512..1022 */ + +/* Take over last syscall number for controlling kutrace */ +#define __NR_kutrace_control 1023 + +/* Take over last syscall64 number for tracing scheduler call/return */ +#define KUTRACE_SCHEDSYSCALL 511 + +/* kutrace_control() commands */ +#define KUTRACE_CMD_OFF 0 +#define KUTRACE_CMD_ON 1 +#define KUTRACE_CMD_FLUSH 2 +#define KUTRACE_CMD_RESET 3 +#define KUTRACE_CMD_STAT 4 +#define KUTRACE_CMD_GETCOUNT 5 +#define KUTRACE_CMD_GETWORD 6 +#define KUTRACE_CMD_INSERT1 7 +#define KUTRACE_CMD_INSERTN 8 +#define KUTRACE_CMD_GETIPCWORD 9 +#define KUTRACE_CMD_TEST 10 +#define KUTRACE_CMD_VERSION 11 + + +/* This is a shortened list of kernel-mode raw trace 12-bit event numbers */ +/* See user-mode kutrace_lib.h for the full set */ + +/* Entry to provide names for PIDs */ +#define KUTRACE_PIDNAME 0x002 + +// Specials are point events +#define KUTRACE_USERPID 0x200 /* Context switch: new PID */ +#define KUTRACE_RPCIDREQ 0x201 +#define KUTRACE_RPCIDRESP 0x202 +#define KUTRACE_RPCIDMID 0x203 +#define KUTRACE_RPCIDRXPKT 0x204 +#define KUTRACE_RPCIDTXPKT 0x205 +#define KUTRACE_RUNNABLE 0x206 /* Set process runnable: PID */ +#define KUTRACE_IPI 0x207 /* Send IPI; receive is interrupt */ +#define KUTRACE_MWAIT 0x208 /* C-states */ +#define KUTRACE_PSTATE 0x209 /* P-states */ + + +/* These are in blocks of 256 numbers */ +#define KUTRACE_TRAP 0x0400 /* AKA fault */ +#define KUTRACE_IRQ 0x0500 +#define KUTRACE_TRAPRET 0x0600 +#define KUTRACE_IRQRET 0x0700 + + +/* These are in blocks of 512 numbers */ +#define KUTRACE_SYSCALL64 0x0800 +#define KUTRACE_SYSRET64 0x0A00 +#define KUTRACE_SYSCALL32 0x0C00 +#define KUTRACE_SYSRET32 0x0E00 + +/* Specific trap number for page fault */ +#define KUTRACE_PAGEFAULT 14 + +/* Specific IRQ numbers. See arch/x86/include/asm/irq_vectors.h */ +#define KUTRACE_LOCAL_TIMER_VECTOR 0xec + +/* Reuse the spurious_apic vector to show bottom halves exeuting */ +#define KUTRACE_BOTTOM_HALF 255 + +#define RESCHEDULE_VECTOR IPI_PREEMPT + + +/* Procedure interface to loadable module or compiled-in kutrace.c */ +struct kutrace_ops { + void (*kutrace_trace_1)(uint64_t num, uint64_t arg); + void (*kutrace_trace_2)(uint64_t num, uint64_t arg1, uint64_t arg2); + void (*kutrace_trace_many)(uint64_t num, uint64_t len, const char *arg); + uint64_t (*kutrace_trace_control)(uint64_t command, uint64_t arg); +}; + +/* Per-cpu struct */ +struct kutrace_traceblock { + uint64_t next; /* Next uint64_t in current pcpu trace block */ + uint64_t *limit; /* Off-the-end uint64_t in current pcpu block */ + uint64_t prior_cycles; /* IPC tracking */ + uint64_t prior_inst_retired; /* IPC tracking */ +}; + + +#ifdef KUTRACE +/* Global variables used by kutrace. Defined in kernel/kutrace/kutrace.c */ +extern bool kutrace_tracing; +extern struct kutrace_ops kutrace_global_ops; +extern uint64_t *kutrace_pid_filter; + +/* Insert pid name if first time seen. Races don't matter here. */ +#define kutrace_pidname(next) \ + if (kutrace_tracing) { \ + uint32_t pid16 = next->td_tid & 0xffff; \ + uint32_t pid_hi = pid16 >> 6; \ + uint64_t pid_bit = 1ul << (pid16 & 0x3f); \ + if ((kutrace_pid_filter[pid_hi] & pid_bit) == 0) { \ + uint64_t name_entry[3]; \ + name_entry[0] = next->td_tid; \ + memcpy(&name_entry[1], next->td_name, 16); \ + (*kutrace_global_ops.kutrace_trace_many)( \ + KUTRACE_PIDNAME, 3l, (const char*)&name_entry[0]); \ + kutrace_pid_filter[pid_hi] |= pid_bit; \ + } \ + } + +#define kutrace1(event, arg) \ + if (kutrace_tracing) { \ + (*kutrace_global_ops.kutrace_trace_1)(event, arg); \ + } + +/* map_nr moves 32-bit syscalls 0x200..3FF to 0x400..5FF */ +extern uint16_t kutrace_syscall_map[]; +#define kutrace_map_nr(nr) (kutrace_syscall_map[nr]) + +#else + +#define kutrace_pidname(next) +#define kutrace1(event, arg) +#define kutrace_map_nr(nr) (nr) + +#endif + + +#endif /* _LINUX_KUTRACE_H */ + + + diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c index 607d725fbcb9..45296c7028d0 100644 --- a/sys/x86/x86/cpu_machdep.c +++ b/sys/x86/x86/cpu_machdep.c @@ -102,6 +102,8 @@ __FBSDID("$FreeBSD$"); #include +#include + #define STATE_RUNNING 0x0 #define STATE_MWAIT 0x1 #define STATE_SLEEPING 0x2 @@ -172,6 +174,7 @@ acpi_cpu_idle_mwait(uint32_t mwait_hint) * but all Intel CPUs provide hardware coordination. */ + kutrace1(KUTRACE_MWAIT, mwait_hint); state = &PCPU_PTR(monitorbuf)->idle_state; KASSERT(atomic_load_int(state) == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); @@ -496,6 +499,7 @@ cpu_idle_mwait(sbintime_t sbt) return; } + kutrace1(KUTRACE_MWAIT, 0); cpu_monitor(state, 0, 0); if (atomic_load_int(state) == STATE_MWAIT) __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index 86dde2d25db2..6199595cd26a 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -63,6 +63,7 @@ #ifdef DDB #include #endif +#include #ifndef DEV_ATPIC #include @@ -350,6 +351,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) if (vector == 0) clkintr_pending = 1; + kutrace1(KUTRACE_IRQ + (vector & 0xFF), 0); /* * For stray interrupts, mask and EOI the source, bump the * stray count, and log the condition. @@ -364,6 +366,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) "too many stray irq %d's: not logging anymore\n", vector); } + kutrace1(KUTRACE_IRQRET + (vector & 0xFF), 0); } void diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index e0fc90566c96..1ab6b611cedb 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$"); #include #endif +#include + #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define GSEL_APIC 0 @@ -1318,11 +1320,11 @@ lapic_handle_timer(struct trapframe *frame) if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif - /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); + kutrace1(KUTRACE_IRQ + KUTRACE_LOCAL_TIMER_VECTOR, 0); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; @@ -1332,6 +1334,7 @@ lapic_handle_timer(struct trapframe *frame) td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } + kutrace1(KUTRACE_IRQRET + KUTRACE_LOCAL_TIMER_VECTOR, 0); critical_exit(); } diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index a25d6291a996..d3589eb74c3a 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items"); /* lock region used by kernel profiling */ @@ -1226,6 +1228,7 @@ ipi_send_cpu(int cpu, u_int ipi) KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + kutrace1(KUTRACE_IPI, cpu); if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; @@ -1253,6 +1256,7 @@ ipi_bitmap_handler(struct trapframe frame) u_int ipi_bitmap; critical_enter(); + kutrace1(KUTRACE_IRQ + IPI_BITMAP_VECTOR, 0); td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; @@ -1278,6 +1282,7 @@ ipi_bitmap_handler(struct trapframe frame) } td->td_intr_frame = oldframe; td->td_intr_nesting_level--; + kutrace1(KUTRACE_IRQRET + IPI_BITMAP_VECTOR, 0); critical_exit(); } @@ -1406,6 +1411,7 @@ cpustop_handler(void) u_int cpu; bool use_mwait; + kutrace1(KUTRACE_IRQ + IPI_STOP, 0); cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); @@ -1443,6 +1449,7 @@ cpustop_handler(void) } cpustop_handler_post(cpu); + kutrace1(KUTRACE_IRQRET + IPI_STOP, 0); } static void