1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2013 by Delphix. All rights reserved.
  29  */
  30 
  31 #define ELF_TARGET_ALL
  32 #include <elf.h>
  33 
  34 #include <sys/types.h>
  35 #include <sys/sysmacros.h>
  36 
  37 #include <unistd.h>
  38 #include <strings.h>
  39 #include <alloca.h>
  40 #include <limits.h>
  41 #include <stddef.h>
  42 #include <stdlib.h>
  43 #include <stdio.h>
  44 #include <fcntl.h>
  45 #include <errno.h>
  46 #include <wait.h>
  47 #include <assert.h>
  48 #include <sys/ipc.h>
  49 
  50 #include <dt_impl.h>
  51 #include <dt_provider.h>
  52 #include <dt_program.h>
  53 #include <dt_string.h>
  54 
  55 #define ESHDR_NULL      0
  56 #define ESHDR_SHSTRTAB  1
  57 #define ESHDR_DOF       2
  58 #define ESHDR_STRTAB    3
  59 #define ESHDR_SYMTAB    4
  60 #define ESHDR_REL       5
  61 #define ESHDR_NUM       6
  62 
  63 #define PWRITE_SCN(index, data) \
  64         (lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \
  65         (off64_t)elf_file.shdr[(index)].sh_offset || \
  66         dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \
  67         elf_file.shdr[(index)].sh_size)
  68 
  69 static const char DTRACE_SHSTRTAB32[] = "\0"
  70 ".shstrtab\0"           /* 1 */
  71 ".SUNW_dof\0"           /* 11 */
  72 ".strtab\0"             /* 21 */
  73 ".symtab\0"             /* 29 */
  74 #ifdef __sparc
  75 ".rela.SUNW_dof";       /* 37 */
  76 #else
  77 ".rel.SUNW_dof";        /* 37 */
  78 #endif
  79 
  80 static const char DTRACE_SHSTRTAB64[] = "\0"
  81 ".shstrtab\0"           /* 1 */
  82 ".SUNW_dof\0"           /* 11 */
  83 ".strtab\0"             /* 21 */
  84 ".symtab\0"             /* 29 */
  85 ".rela.SUNW_dof";       /* 37 */
  86 
  87 static const char DOFSTR[] = "__SUNW_dof";
  88 static const char DOFLAZYSTR[] = "___SUNW_dof";
  89 
  90 typedef struct dt_link_pair {
  91         struct dt_link_pair *dlp_next;  /* next pair in linked list */
  92         void *dlp_str;                  /* buffer for string table */
  93         void *dlp_sym;                  /* buffer for symbol table */
  94 } dt_link_pair_t;
  95 
  96 typedef struct dof_elf32 {
  97         uint32_t de_nrel;               /* relocation count */
  98 #ifdef __sparc
  99         Elf32_Rela *de_rel;             /* array of relocations for sparc */
 100 #else
 101         Elf32_Rel *de_rel;              /* array of relocations for x86 */
 102 #endif
 103         uint32_t de_nsym;               /* symbol count */
 104         Elf32_Sym *de_sym;              /* array of symbols */
 105         uint32_t de_strlen;             /* size of of string table */
 106         char *de_strtab;                /* string table */
 107         uint32_t de_global;             /* index of the first global symbol */
 108 } dof_elf32_t;
 109 
 110 static int
 111 prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep)
 112 {
 113         dof_sec_t *dofs, *s;
 114         dof_relohdr_t *dofrh;
 115         dof_relodesc_t *dofr;
 116         char *strtab;
 117         int i, j, nrel;
 118         size_t strtabsz = 1;
 119         uint32_t count = 0;
 120         size_t base;
 121         Elf32_Sym *sym;
 122 #ifdef __sparc
 123         Elf32_Rela *rel;
 124 #else
 125         Elf32_Rel *rel;
 126 #endif
 127 
 128         /*LINTED*/
 129         dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 130 
 131         /*
 132          * First compute the size of the string table and the number of
 133          * relocations present in the DOF.
 134          */
 135         for (i = 0; i < dof->dofh_secnum; i++) {
 136                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 137                         continue;
 138 
 139                 /*LINTED*/
 140                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 141 
 142                 s = &dofs[dofrh->dofr_strtab];
 143                 strtab = (char *)dof + s->dofs_offset;
 144                 assert(strtab[0] == '\0');
 145                 strtabsz += s->dofs_size - 1;
 146 
 147                 s = &dofs[dofrh->dofr_relsec];
 148                 /*LINTED*/
 149                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 150                 count += s->dofs_size / s->dofs_entsize;
 151         }
 152 
 153         dep->de_strlen = strtabsz;
 154         dep->de_nrel = count;
 155         dep->de_nsym = count + 1; /* the first symbol is always null */
 156 
 157         if (dtp->dt_lazyload) {
 158                 dep->de_strlen += sizeof (DOFLAZYSTR);
 159                 dep->de_nsym++;
 160         } else {
 161                 dep->de_strlen += sizeof (DOFSTR);
 162                 dep->de_nsym++;
 163         }
 164 
 165         if ((dep->de_rel = calloc(dep->de_nrel,
 166             sizeof (dep->de_rel[0]))) == NULL) {
 167                 return (dt_set_errno(dtp, EDT_NOMEM));
 168         }
 169 
 170         if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) {
 171                 free(dep->de_rel);
 172                 return (dt_set_errno(dtp, EDT_NOMEM));
 173         }
 174 
 175         if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 176                 free(dep->de_rel);
 177                 free(dep->de_sym);
 178                 return (dt_set_errno(dtp, EDT_NOMEM));
 179         }
 180 
 181         count = 0;
 182         strtabsz = 1;
 183         dep->de_strtab[0] = '\0';
 184         rel = dep->de_rel;
 185         sym = dep->de_sym;
 186         dep->de_global = 1;
 187 
 188         /*
 189          * The first symbol table entry must be zeroed and is always ignored.
 190          */
 191         bzero(sym, sizeof (Elf32_Sym));
 192         sym++;
 193 
 194         /*
 195          * Take a second pass through the DOF sections filling in the
 196          * memory we allocated.
 197          */
 198         for (i = 0; i < dof->dofh_secnum; i++) {
 199                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 200                         continue;
 201 
 202                 /*LINTED*/
 203                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 204 
 205                 s = &dofs[dofrh->dofr_strtab];
 206                 strtab = (char *)dof + s->dofs_offset;
 207                 bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 208                 base = strtabsz;
 209                 strtabsz += s->dofs_size - 1;
 210 
 211                 s = &dofs[dofrh->dofr_relsec];
 212                 /*LINTED*/
 213                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 214                 nrel = s->dofs_size / s->dofs_entsize;
 215 
 216                 s = &dofs[dofrh->dofr_tgtsec];
 217 
 218                 for (j = 0; j < nrel; j++) {
 219 #if defined(__i386) || defined(__amd64)
 220                         rel->r_offset = s->dofs_offset +
 221                             dofr[j].dofr_offset;
 222                         rel->r_info = ELF32_R_INFO(count + dep->de_global,
 223                             R_386_32);
 224 #elif defined(__sparc)
 225                         /*
 226                          * Add 4 bytes to hit the low half of this 64-bit
 227                          * big-endian address.
 228                          */
 229                         rel->r_offset = s->dofs_offset +
 230                             dofr[j].dofr_offset + 4;
 231                         rel->r_info = ELF32_R_INFO(count + dep->de_global,
 232                             R_SPARC_32);
 233 #else
 234 #error unknown ISA
 235 #endif
 236 
 237                         sym->st_name = base + dofr[j].dofr_name - 1;
 238                         sym->st_value = 0;
 239                         sym->st_size = 0;
 240                         sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
 241                         sym->st_other = 0;
 242                         sym->st_shndx = SHN_UNDEF;
 243 
 244                         rel++;
 245                         sym++;
 246                         count++;
 247                 }
 248         }
 249 
 250         /*
 251          * Add a symbol for the DOF itself. We use a different symbol for
 252          * lazily and actively loaded DOF to make them easy to distinguish.
 253          */
 254         sym->st_name = strtabsz;
 255         sym->st_value = 0;
 256         sym->st_size = dof->dofh_filesz;
 257         sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT);
 258         sym->st_other = 0;
 259         sym->st_shndx = ESHDR_DOF;
 260         sym++;
 261 
 262         if (dtp->dt_lazyload) {
 263                 bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 264                     sizeof (DOFLAZYSTR));
 265                 strtabsz += sizeof (DOFLAZYSTR);
 266         } else {
 267                 bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 268                 strtabsz += sizeof (DOFSTR);
 269         }
 270 
 271         assert(count == dep->de_nrel);
 272         assert(strtabsz == dep->de_strlen);
 273 
 274         return (0);
 275 }
 276 
 277 
 278 typedef struct dof_elf64 {
 279         uint32_t de_nrel;
 280         Elf64_Rela *de_rel;
 281         uint32_t de_nsym;
 282         Elf64_Sym *de_sym;
 283 
 284         uint32_t de_strlen;
 285         char *de_strtab;
 286 
 287         uint32_t de_global;
 288 } dof_elf64_t;
 289 
 290 static int
 291 prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep)
 292 {
 293         dof_sec_t *dofs, *s;
 294         dof_relohdr_t *dofrh;
 295         dof_relodesc_t *dofr;
 296         char *strtab;
 297         int i, j, nrel;
 298         size_t strtabsz = 1;
 299         uint32_t count = 0;
 300         size_t base;
 301         Elf64_Sym *sym;
 302         Elf64_Rela *rel;
 303 
 304         /*LINTED*/
 305         dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 306 
 307         /*
 308          * First compute the size of the string table and the number of
 309          * relocations present in the DOF.
 310          */
 311         for (i = 0; i < dof->dofh_secnum; i++) {
 312                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 313                         continue;
 314 
 315                 /*LINTED*/
 316                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 317 
 318                 s = &dofs[dofrh->dofr_strtab];
 319                 strtab = (char *)dof + s->dofs_offset;
 320                 assert(strtab[0] == '\0');
 321                 strtabsz += s->dofs_size - 1;
 322 
 323                 s = &dofs[dofrh->dofr_relsec];
 324                 /*LINTED*/
 325                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 326                 count += s->dofs_size / s->dofs_entsize;
 327         }
 328 
 329         dep->de_strlen = strtabsz;
 330         dep->de_nrel = count;
 331         dep->de_nsym = count + 1; /* the first symbol is always null */
 332 
 333         if (dtp->dt_lazyload) {
 334                 dep->de_strlen += sizeof (DOFLAZYSTR);
 335                 dep->de_nsym++;
 336         } else {
 337                 dep->de_strlen += sizeof (DOFSTR);
 338                 dep->de_nsym++;
 339         }
 340 
 341         if ((dep->de_rel = calloc(dep->de_nrel,
 342             sizeof (dep->de_rel[0]))) == NULL) {
 343                 return (dt_set_errno(dtp, EDT_NOMEM));
 344         }
 345 
 346         if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) {
 347                 free(dep->de_rel);
 348                 return (dt_set_errno(dtp, EDT_NOMEM));
 349         }
 350 
 351         if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 352                 free(dep->de_rel);
 353                 free(dep->de_sym);
 354                 return (dt_set_errno(dtp, EDT_NOMEM));
 355         }
 356 
 357         count = 0;
 358         strtabsz = 1;
 359         dep->de_strtab[0] = '\0';
 360         rel = dep->de_rel;
 361         sym = dep->de_sym;
 362         dep->de_global = 1;
 363 
 364         /*
 365          * The first symbol table entry must be zeroed and is always ignored.
 366          */
 367         bzero(sym, sizeof (Elf64_Sym));
 368         sym++;
 369 
 370         /*
 371          * Take a second pass through the DOF sections filling in the
 372          * memory we allocated.
 373          */
 374         for (i = 0; i < dof->dofh_secnum; i++) {
 375                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 376                         continue;
 377 
 378                 /*LINTED*/
 379                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 380 
 381                 s = &dofs[dofrh->dofr_strtab];
 382                 strtab = (char *)dof + s->dofs_offset;
 383                 bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 384                 base = strtabsz;
 385                 strtabsz += s->dofs_size - 1;
 386 
 387                 s = &dofs[dofrh->dofr_relsec];
 388                 /*LINTED*/
 389                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 390                 nrel = s->dofs_size / s->dofs_entsize;
 391 
 392                 s = &dofs[dofrh->dofr_tgtsec];
 393 
 394                 for (j = 0; j < nrel; j++) {
 395 #if defined(__i386) || defined(__amd64)
 396                         rel->r_offset = s->dofs_offset +
 397                             dofr[j].dofr_offset;
 398                         rel->r_info = ELF64_R_INFO(count + dep->de_global,
 399                             R_AMD64_64);
 400 #elif defined(__sparc)
 401                         rel->r_offset = s->dofs_offset +
 402                             dofr[j].dofr_offset;
 403                         rel->r_info = ELF64_R_INFO(count + dep->de_global,
 404                             R_SPARC_64);
 405 #else
 406 #error unknown ISA
 407 #endif
 408 
 409                         sym->st_name = base + dofr[j].dofr_name - 1;
 410                         sym->st_value = 0;
 411                         sym->st_size = 0;
 412                         sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
 413                         sym->st_other = 0;
 414                         sym->st_shndx = SHN_UNDEF;
 415 
 416                         rel++;
 417                         sym++;
 418                         count++;
 419                 }
 420         }
 421 
 422         /*
 423          * Add a symbol for the DOF itself. We use a different symbol for
 424          * lazily and actively loaded DOF to make them easy to distinguish.
 425          */
 426         sym->st_name = strtabsz;
 427         sym->st_value = 0;
 428         sym->st_size = dof->dofh_filesz;
 429         sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT);
 430         sym->st_other = 0;
 431         sym->st_shndx = ESHDR_DOF;
 432         sym++;
 433 
 434         if (dtp->dt_lazyload) {
 435                 bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 436                     sizeof (DOFLAZYSTR));
 437                 strtabsz += sizeof (DOFLAZYSTR);
 438         } else {
 439                 bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 440                 strtabsz += sizeof (DOFSTR);
 441         }
 442 
 443         assert(count == dep->de_nrel);
 444         assert(strtabsz == dep->de_strlen);
 445 
 446         return (0);
 447 }
 448 
 449 /*
 450  * Write out an ELF32 file prologue consisting of a header, section headers,
 451  * and a section header string table.  The DOF data will follow this prologue
 452  * and complete the contents of the given ELF file.
 453  */
 454 static int
 455 dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 456 {
 457         struct {
 458                 Elf32_Ehdr ehdr;
 459                 Elf32_Shdr shdr[ESHDR_NUM];
 460         } elf_file;
 461 
 462         Elf32_Shdr *shp;
 463         Elf32_Off off;
 464         dof_elf32_t de;
 465         int ret = 0;
 466         uint_t nshdr;
 467 
 468         if (prepare_elf32(dtp, dof, &de) != 0)
 469                 return (-1); /* errno is set for us */
 470 
 471         /*
 472          * If there are no relocations, we only need enough sections for
 473          * the shstrtab and the DOF.
 474          */
 475         nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 476 
 477         bzero(&elf_file, sizeof (elf_file));
 478 
 479         elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 480         elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 481         elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 482         elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 483         elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 484         elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32;
 485 #if defined(_BIG_ENDIAN)
 486         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 487 #elif defined(_LITTLE_ENDIAN)
 488         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 489 #endif
 490         elf_file.ehdr.e_type = ET_REL;
 491 #if defined(__sparc)
 492         elf_file.ehdr.e_machine = EM_SPARC;
 493 #elif defined(__i386) || defined(__amd64)
 494         elf_file.ehdr.e_machine = EM_386;
 495 #endif
 496         elf_file.ehdr.e_version = EV_CURRENT;
 497         elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr);
 498         elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr);
 499         elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr);
 500         elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr);
 501         elf_file.ehdr.e_shnum = nshdr;
 502         elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 503         off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr);
 504 
 505         shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 506         shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */
 507         shp->sh_type = SHT_STRTAB;
 508         shp->sh_offset = off;
 509         shp->sh_size = sizeof (DTRACE_SHSTRTAB32);
 510         shp->sh_addralign = sizeof (char);
 511         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 512 
 513         shp = &elf_file.shdr[ESHDR_DOF];
 514         shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */
 515         shp->sh_flags = SHF_ALLOC;
 516         shp->sh_type = SHT_SUNW_dof;
 517         shp->sh_offset = off;
 518         shp->sh_size = dof->dofh_filesz;
 519         shp->sh_addralign = 8;
 520         off = shp->sh_offset + shp->sh_size;
 521 
 522         shp = &elf_file.shdr[ESHDR_STRTAB];
 523         shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */
 524         shp->sh_flags = SHF_ALLOC;
 525         shp->sh_type = SHT_STRTAB;
 526         shp->sh_offset = off;
 527         shp->sh_size = de.de_strlen;
 528         shp->sh_addralign = sizeof (char);
 529         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 530 
 531         shp = &elf_file.shdr[ESHDR_SYMTAB];
 532         shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */
 533         shp->sh_flags = SHF_ALLOC;
 534         shp->sh_type = SHT_SYMTAB;
 535         shp->sh_entsize = sizeof (Elf32_Sym);
 536         shp->sh_link = ESHDR_STRTAB;
 537         shp->sh_offset = off;
 538         shp->sh_info = de.de_global;
 539         shp->sh_size = de.de_nsym * sizeof (Elf32_Sym);
 540         shp->sh_addralign = 4;
 541         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 542 
 543         if (de.de_nrel == 0) {
 544                 if (dt_write(dtp, fd, &elf_file,
 545                     sizeof (elf_file)) != sizeof (elf_file) ||
 546                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 547                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 548                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 549                     PWRITE_SCN(ESHDR_DOF, dof)) {
 550                         ret = dt_set_errno(dtp, errno);
 551                 }
 552         } else {
 553                 shp = &elf_file.shdr[ESHDR_REL];
 554                 shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */
 555                 shp->sh_flags = SHF_ALLOC;
 556 #ifdef __sparc
 557                 shp->sh_type = SHT_RELA;
 558 #else
 559                 shp->sh_type = SHT_REL;
 560 #endif
 561                 shp->sh_entsize = sizeof (de.de_rel[0]);
 562                 shp->sh_link = ESHDR_SYMTAB;
 563                 shp->sh_info = ESHDR_DOF;
 564                 shp->sh_offset = off;
 565                 shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 566                 shp->sh_addralign = 4;
 567 
 568                 if (dt_write(dtp, fd, &elf_file,
 569                     sizeof (elf_file)) != sizeof (elf_file) ||
 570                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 571                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 572                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 573                     PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 574                     PWRITE_SCN(ESHDR_DOF, dof)) {
 575                         ret = dt_set_errno(dtp, errno);
 576                 }
 577         }
 578 
 579         free(de.de_strtab);
 580         free(de.de_sym);
 581         free(de.de_rel);
 582 
 583         return (ret);
 584 }
 585 
 586 /*
 587  * Write out an ELF64 file prologue consisting of a header, section headers,
 588  * and a section header string table.  The DOF data will follow this prologue
 589  * and complete the contents of the given ELF file.
 590  */
 591 static int
 592 dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 593 {
 594         struct {
 595                 Elf64_Ehdr ehdr;
 596                 Elf64_Shdr shdr[ESHDR_NUM];
 597         } elf_file;
 598 
 599         Elf64_Shdr *shp;
 600         Elf64_Off off;
 601         dof_elf64_t de;
 602         int ret = 0;
 603         uint_t nshdr;
 604 
 605         if (prepare_elf64(dtp, dof, &de) != 0)
 606                 return (-1); /* errno is set for us */
 607 
 608         /*
 609          * If there are no relocations, we only need enough sections for
 610          * the shstrtab and the DOF.
 611          */
 612         nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 613 
 614         bzero(&elf_file, sizeof (elf_file));
 615 
 616         elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 617         elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 618         elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 619         elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 620         elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 621         elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
 622 #if defined(_BIG_ENDIAN)
 623         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 624 #elif defined(_LITTLE_ENDIAN)
 625         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 626 #endif
 627         elf_file.ehdr.e_type = ET_REL;
 628 #if defined(__sparc)
 629         elf_file.ehdr.e_machine = EM_SPARCV9;
 630 #elif defined(__i386) || defined(__amd64)
 631         elf_file.ehdr.e_machine = EM_AMD64;
 632 #endif
 633         elf_file.ehdr.e_version = EV_CURRENT;
 634         elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr);
 635         elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr);
 636         elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr);
 637         elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr);
 638         elf_file.ehdr.e_shnum = nshdr;
 639         elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 640         off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr);
 641 
 642         shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 643         shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */
 644         shp->sh_type = SHT_STRTAB;
 645         shp->sh_offset = off;
 646         shp->sh_size = sizeof (DTRACE_SHSTRTAB64);
 647         shp->sh_addralign = sizeof (char);
 648         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 649 
 650         shp = &elf_file.shdr[ESHDR_DOF];
 651         shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */
 652         shp->sh_flags = SHF_ALLOC;
 653         shp->sh_type = SHT_SUNW_dof;
 654         shp->sh_offset = off;
 655         shp->sh_size = dof->dofh_filesz;
 656         shp->sh_addralign = 8;
 657         off = shp->sh_offset + shp->sh_size;
 658 
 659         shp = &elf_file.shdr[ESHDR_STRTAB];
 660         shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */
 661         shp->sh_flags = SHF_ALLOC;
 662         shp->sh_type = SHT_STRTAB;
 663         shp->sh_offset = off;
 664         shp->sh_size = de.de_strlen;
 665         shp->sh_addralign = sizeof (char);
 666         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 667 
 668         shp = &elf_file.shdr[ESHDR_SYMTAB];
 669         shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */
 670         shp->sh_flags = SHF_ALLOC;
 671         shp->sh_type = SHT_SYMTAB;
 672         shp->sh_entsize = sizeof (Elf64_Sym);
 673         shp->sh_link = ESHDR_STRTAB;
 674         shp->sh_offset = off;
 675         shp->sh_info = de.de_global;
 676         shp->sh_size = de.de_nsym * sizeof (Elf64_Sym);
 677         shp->sh_addralign = 8;
 678         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 679 
 680         if (de.de_nrel == 0) {
 681                 if (dt_write(dtp, fd, &elf_file,
 682                     sizeof (elf_file)) != sizeof (elf_file) ||
 683                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 684                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 685                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 686                     PWRITE_SCN(ESHDR_DOF, dof)) {
 687                         ret = dt_set_errno(dtp, errno);
 688                 }
 689         } else {
 690                 shp = &elf_file.shdr[ESHDR_REL];
 691                 shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */
 692                 shp->sh_flags = SHF_ALLOC;
 693                 shp->sh_type = SHT_RELA;
 694                 shp->sh_entsize = sizeof (de.de_rel[0]);
 695                 shp->sh_link = ESHDR_SYMTAB;
 696                 shp->sh_info = ESHDR_DOF;
 697                 shp->sh_offset = off;
 698                 shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 699                 shp->sh_addralign = 8;
 700 
 701                 if (dt_write(dtp, fd, &elf_file,
 702                     sizeof (elf_file)) != sizeof (elf_file) ||
 703                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 704                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 705                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 706                     PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 707                     PWRITE_SCN(ESHDR_DOF, dof)) {
 708                         ret = dt_set_errno(dtp, errno);
 709                 }
 710         }
 711 
 712         free(de.de_strtab);
 713         free(de.de_sym);
 714         free(de.de_rel);
 715 
 716         return (ret);
 717 }
 718 
 719 static int
 720 dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn,
 721     GElf_Sym *sym)
 722 {
 723         int i, ret = -1;
 724         GElf_Sym s;
 725 
 726         for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) {
 727                 if (GELF_ST_TYPE(sym->st_info) == STT_FUNC &&
 728                     shn == sym->st_shndx &&
 729                     sym->st_value <= addr &&
 730                     addr < sym->st_value + sym->st_size) {
 731                         if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
 732                                 return (0);
 733 
 734                         ret = 0;
 735                         s = *sym;
 736                 }
 737         }
 738 
 739         if (ret == 0)
 740                 *sym = s;
 741         return (ret);
 742 }
 743 
 744 #if defined(__sparc)
 745 
 746 #define DT_OP_RET               0x81c7e008
 747 #define DT_OP_NOP               0x01000000
 748 #define DT_OP_CALL              0x40000000
 749 #define DT_OP_CLR_O0            0x90102000
 750 
 751 #define DT_IS_MOV_O7(inst)      (((inst) & 0xffffe000) == 0x9e100000)
 752 #define DT_IS_RESTORE(inst)     (((inst) & 0xc1f80000) == 0x81e80000)
 753 #define DT_IS_RETL(inst)        (((inst) & 0xfff83fff) == 0x81c02008)
 754 
 755 #define DT_RS2(inst)            ((inst) & 0x1f)
 756 #define DT_MAKE_RETL(reg)       (0x81c02008 | ((reg) << 14))
 757 
 758 /*ARGSUSED*/
 759 static int
 760 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
 761     uint32_t *off)
 762 {
 763         uint32_t *ip;
 764 
 765         if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 766                 return (-1);
 767 
 768         /*LINTED*/
 769         ip = (uint32_t *)(p + rela->r_offset);
 770 
 771         /*
 772          * We only know about some specific relocation types.
 773          */
 774         if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 &&
 775             GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30)
 776                 return (-1);
 777 
 778         /*
 779          * We may have already processed this object file in an earlier linker
 780          * invocation. Check to see if the present instruction sequence matches
 781          * the one we would install below.
 782          */
 783         if (isenabled) {
 784                 if (ip[0] == DT_OP_NOP) {
 785                         (*off) += sizeof (ip[0]);
 786                         return (0);
 787                 }
 788         } else {
 789                 if (DT_IS_RESTORE(ip[1])) {
 790                         if (ip[0] == DT_OP_RET) {
 791                                 (*off) += sizeof (ip[0]);
 792                                 return (0);
 793                         }
 794                 } else if (DT_IS_MOV_O7(ip[1])) {
 795                         if (DT_IS_RETL(ip[0]))
 796                                 return (0);
 797                 } else {
 798                         if (ip[0] == DT_OP_NOP) {
 799                                 (*off) += sizeof (ip[0]);
 800                                 return (0);
 801                         }
 802                 }
 803         }
 804 
 805         /*
 806          * We only expect call instructions with a displacement of 0.
 807          */
 808         if (ip[0] != DT_OP_CALL) {
 809                 dt_dprintf("found %x instead of a call instruction at %llx\n",
 810                     ip[0], (u_longlong_t)rela->r_offset);
 811                 return (-1);
 812         }
 813 
 814         if (isenabled) {
 815                 /*
 816                  * It would necessarily indicate incorrect usage if an is-
 817                  * enabled probe were tail-called so flag that as an error.
 818                  * It's also potentially (very) tricky to handle gracefully,
 819                  * but could be done if this were a desired use scenario.
 820                  */
 821                 if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) {
 822                         dt_dprintf("tail call to is-enabled probe at %llx\n",
 823                             (u_longlong_t)rela->r_offset);
 824                         return (-1);
 825                 }
 826 
 827 
 828                 /*
 829                  * On SPARC, we take advantage of the fact that the first
 830                  * argument shares the same register as for the return value.
 831                  * The macro handles the work of zeroing that register so we
 832                  * don't need to do anything special here. We instrument the
 833                  * instruction in the delay slot as we'll need to modify the
 834                  * return register after that instruction has been emulated.
 835                  */
 836                 ip[0] = DT_OP_NOP;
 837                 (*off) += sizeof (ip[0]);
 838         } else {
 839                 /*
 840                  * If the call is followed by a restore, it's a tail call so
 841                  * change the call to a ret. If the call if followed by a mov
 842                  * of a register into %o7, it's a tail call in leaf context
 843                  * so change the call to a retl-like instruction that returns
 844                  * to that register value + 8 (rather than the typical %o7 +
 845                  * 8); the delay slot instruction is left, but should have no
 846                  * effect. Otherwise we change the call to be a nop. We
 847                  * identify the subsequent instruction as the probe point in
 848                  * all but the leaf tail-call case to ensure that arguments to
 849                  * the probe are complete and consistent. An astute, though
 850                  * largely hypothetical, observer would note that there is the
 851                  * possibility of a false-positive probe firing if the function
 852                  * contained a branch to the instruction in the delay slot of
 853                  * the call. Fixing this would require significant in-kernel
 854                  * modifications, and isn't worth doing until we see it in the
 855                  * wild.
 856                  */
 857                 if (DT_IS_RESTORE(ip[1])) {
 858                         ip[0] = DT_OP_RET;
 859                         (*off) += sizeof (ip[0]);
 860                 } else if (DT_IS_MOV_O7(ip[1])) {
 861                         ip[0] = DT_MAKE_RETL(DT_RS2(ip[1]));
 862                 } else {
 863                         ip[0] = DT_OP_NOP;
 864                         (*off) += sizeof (ip[0]);
 865                 }
 866         }
 867 
 868         return (0);
 869 }
 870 
 871 #elif defined(__i386) || defined(__amd64)
 872 
 873 #define DT_OP_NOP               0x90
 874 #define DT_OP_RET               0xc3
 875 #define DT_OP_CALL              0xe8
 876 #define DT_OP_JMP32             0xe9
 877 #define DT_OP_REX_RAX           0x48
 878 #define DT_OP_XOR_EAX_0         0x33
 879 #define DT_OP_XOR_EAX_1         0xc0
 880 
 881 static int
 882 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
 883     uint32_t *off)
 884 {
 885         uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1);
 886         uint8_t ret;
 887 
 888         /*
 889          * On x86, the first byte of the instruction is the call opcode and
 890          * the next four bytes are the 32-bit address; the relocation is for
 891          * the address operand. We back up the offset to the first byte of
 892          * the instruction. For is-enabled probes, we later advance the offset
 893          * so that it hits the first nop in the instruction sequence.
 894          */
 895         (*off) -= 1;
 896 
 897         /*
 898          * We only know about some specific relocation types. Luckily
 899          * these types have the same values on both 32-bit and 64-bit
 900          * x86 architectures.
 901          */
 902         if (GELF_R_TYPE(rela->r_info) != R_386_PC32 &&
 903             GELF_R_TYPE(rela->r_info) != R_386_PLT32)
 904                 return (-1);
 905 
 906         /*
 907          * We may have already processed this object file in an earlier linker
 908          * invocation. Check to see if the present instruction sequence matches
 909          * the one we would install. For is-enabled probes, we advance the
 910          * offset to the first nop instruction in the sequence to match the
 911          * text modification code below.
 912          */
 913         if (!isenabled) {
 914                 if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) &&
 915                     ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP &&
 916                     ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP)
 917                         return (0);
 918         } else if (dtp->dt_oflags & DTRACE_O_LP64) {
 919                 if (ip[0] == DT_OP_REX_RAX &&
 920                     ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 &&
 921                     (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) &&
 922                     ip[4] == DT_OP_NOP) {
 923                         (*off) += 3;
 924                         return (0);
 925                 }
 926         } else {
 927                 if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 &&
 928                     (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) &&
 929                     ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) {
 930                         (*off) += 2;
 931                         return (0);
 932                 }
 933         }
 934 
 935         /*
 936          * We expect either a call instrution with a 32-bit displacement or a
 937          * jmp instruction with a 32-bit displacement acting as a tail-call.
 938          */
 939         if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) {
 940                 dt_dprintf("found %x instead of a call or jmp instruction at "
 941                     "%llx\n", ip[0], (u_longlong_t)rela->r_offset);
 942                 return (-1);
 943         }
 944 
 945         ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP;
 946 
 947         /*
 948          * Establish the instruction sequence -- all nops for probes, and an
 949          * instruction to clear the return value register (%eax/%rax) followed
 950          * by nops for is-enabled probes. For is-enabled probes, we advance
 951          * the offset to the first nop. This isn't stricly necessary but makes
 952          * for more readable disassembly when the probe is enabled.
 953          */
 954         if (!isenabled) {
 955                 ip[0] = ret;
 956                 ip[1] = DT_OP_NOP;
 957                 ip[2] = DT_OP_NOP;
 958                 ip[3] = DT_OP_NOP;
 959                 ip[4] = DT_OP_NOP;
 960         } else if (dtp->dt_oflags & DTRACE_O_LP64) {
 961                 ip[0] = DT_OP_REX_RAX;
 962                 ip[1] = DT_OP_XOR_EAX_0;
 963                 ip[2] = DT_OP_XOR_EAX_1;
 964                 ip[3] = ret;
 965                 ip[4] = DT_OP_NOP;
 966                 (*off) += 3;
 967         } else {
 968                 ip[0] = DT_OP_XOR_EAX_0;
 969                 ip[1] = DT_OP_XOR_EAX_1;
 970                 ip[2] = ret;
 971                 ip[3] = DT_OP_NOP;
 972                 ip[4] = DT_OP_NOP;
 973                 (*off) += 2;
 974         }
 975 
 976         return (0);
 977 }
 978 
 979 #else
 980 #error unknown ISA
 981 #endif
 982 
 983 /*PRINTFLIKE5*/
 984 static int
 985 dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs,
 986     const char *format, ...)
 987 {
 988         va_list ap;
 989         dt_link_pair_t *pair;
 990 
 991         va_start(ap, format);
 992         dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
 993         va_end(ap);
 994 
 995         if (elf != NULL)
 996                 (void) elf_end(elf);
 997 
 998         if (fd >= 0)
 999                 (void) close(fd);
1000 
1001         while ((pair = bufs) != NULL) {
1002                 bufs = pair->dlp_next;
1003                 dt_free(dtp, pair->dlp_str);
1004                 dt_free(dtp, pair->dlp_sym);
1005                 dt_free(dtp, pair);
1006         }
1007 
1008         return (dt_set_errno(dtp, EDT_COMPILER));
1009 }
1010 
1011 static int
1012 process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
1013 {
1014         static const char dt_prefix[] = "__dtrace";
1015         static const char dt_enabled[] = "enabled";
1016         static const char dt_symprefix[] = "$dtrace";
1017         static const char dt_symfmt[] = "%s%d.%s";
1018         int fd, i, ndx, eprobe, mod = 0;
1019         Elf *elf = NULL;
1020         GElf_Ehdr ehdr;
1021         Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt;
1022         Elf_Data *data_rel, *data_sym, *data_str, *data_tgt;
1023         GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt;
1024         GElf_Sym rsym, fsym, dsym;
1025         GElf_Rela rela;
1026         char *s, *p, *r;
1027         char pname[DTRACE_PROVNAMELEN];
1028         dt_provider_t *pvp;
1029         dt_probe_t *prp;
1030         uint32_t off, eclass, emachine1, emachine2;
1031         size_t symsize, nsym, isym, istr, len;
1032         key_t objkey;
1033         dt_link_pair_t *pair, *bufs = NULL;
1034         dt_strtab_t *strtab;
1035         Elf_Data *data_newsym, *data_newstr;
1036         size_t newsym = 0;
1037 
1038         if ((fd = open64(obj, O_RDWR)) == -1) {
1039                 return (dt_link_error(dtp, elf, fd, bufs,
1040                     "failed to open %s: %s", obj, strerror(errno)));
1041         }
1042 
1043         if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
1044                 return (dt_link_error(dtp, elf, fd, bufs,
1045                     "failed to process %s: %s", obj, elf_errmsg(elf_errno())));
1046         }
1047 
1048         switch (elf_kind(elf)) {
1049         case ELF_K_ELF:
1050                 break;
1051         case ELF_K_AR:
1052                 return (dt_link_error(dtp, elf, fd, bufs, "archives are not "
1053                     "permitted; use the contents of the archive instead: %s",
1054                     obj));
1055         default:
1056                 return (dt_link_error(dtp, elf, fd, bufs,
1057                     "invalid file type: %s", obj));
1058         }
1059 
1060         if (gelf_getehdr(elf, &ehdr) == NULL) {
1061                 return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s",
1062                     obj));
1063         }
1064 
1065         if (dtp->dt_oflags & DTRACE_O_LP64) {
1066                 eclass = ELFCLASS64;
1067 #if defined(__sparc)
1068                 emachine1 = emachine2 = EM_SPARCV9;
1069 #elif defined(__i386) || defined(__amd64)
1070                 emachine1 = emachine2 = EM_AMD64;
1071 #endif
1072                 symsize = sizeof (Elf64_Sym);
1073         } else {
1074                 eclass = ELFCLASS32;
1075 #if defined(__sparc)
1076                 emachine1 = EM_SPARC;
1077                 emachine2 = EM_SPARC32PLUS;
1078 #elif defined(__i386) || defined(__amd64)
1079                 emachine1 = emachine2 = EM_386;
1080 #endif
1081                 symsize = sizeof (Elf32_Sym);
1082         }
1083 
1084         if (ehdr.e_ident[EI_CLASS] != eclass) {
1085                 return (dt_link_error(dtp, elf, fd, bufs,
1086                     "incorrect ELF class for object file: %s", obj));
1087         }
1088 
1089         if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
1090                 return (dt_link_error(dtp, elf, fd, bufs,
1091                     "incorrect ELF machine type for object file: %s", obj));
1092         }
1093 
1094         /*
1095          * We use this token as a relatively unique handle for this file on the
1096          * system in order to disambiguate potential conflicts between files of
1097          * the same name which contain identially named local symbols.
1098          */
1099         if ((objkey = ftok(obj, 0)) == (key_t)-1) {
1100                 return (dt_link_error(dtp, elf, fd, bufs,
1101                     "failed to generate unique key for object file: %s", obj));
1102         }
1103 
1104         scn_rel = NULL;
1105         while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
1106                 if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
1107                         goto err;
1108 
1109                 /*
1110                  * Skip any non-relocation sections.
1111                  */
1112                 if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
1113                         continue;
1114 
1115                 if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
1116                         goto err;
1117 
1118                 /*
1119                  * Grab the section, section header and section data for the
1120                  * symbol table that this relocation section references.
1121                  */
1122                 if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
1123                     gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
1124                     (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
1125                         goto err;
1126 
1127                 /*
1128                  * Ditto for that symbol table's string table.
1129                  */
1130                 if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
1131                     gelf_getshdr(scn_str, &shdr_str) == NULL ||
1132                     (data_str = elf_getdata(scn_str, NULL)) == NULL)
1133                         goto err;
1134 
1135                 /*
1136                  * Grab the section, section header and section data for the
1137                  * target section for the relocations. For the relocations
1138                  * we're looking for -- this will typically be the text of the
1139                  * object file.
1140                  */
1141                 if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
1142                     gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
1143                     (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
1144                         goto err;
1145 
1146                 /*
1147                  * We're looking for relocations to symbols matching this form:
1148                  *
1149                  *   __dtrace[enabled]_<prov>___<probe>
1150                  *
1151                  * For the generated object, we need to record the location
1152                  * identified by the relocation, and create a new relocation
1153                  * in the generated object that will be resolved at link time
1154                  * to the location of the function in which the probe is
1155                  * embedded. In the target object, we change the matched symbol
1156                  * so that it will be ignored at link time, and we modify the
1157                  * target (text) section to replace the call instruction with
1158                  * one or more nops.
1159                  *
1160                  * If the function containing the probe is locally scoped
1161                  * (static), we create an alias used by the relocation in the
1162                  * generated object. The alias, a new symbol, will be global
1163                  * (so that the relocation from the generated object can be
1164                  * resolved), and hidden (so that it is converted to a local
1165                  * symbol at link time). Such aliases have this form:
1166                  *
1167                  *   $dtrace<key>.<function>
1168                  *
1169                  * We take a first pass through all the relocations to
1170                  * populate our string table and count the number of extra
1171                  * symbols we'll require.
1172                  */
1173                 strtab = dt_strtab_create(1);
1174                 nsym = 0;
1175                 isym = data_sym->d_size / symsize;
1176                 istr = data_str->d_size;
1177 
1178                 for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1179 
1180                         if (shdr_rel.sh_type == SHT_RELA) {
1181                                 if (gelf_getrela(data_rel, i, &rela) == NULL)
1182                                         continue;
1183                         } else {
1184                                 GElf_Rel rel;
1185                                 if (gelf_getrel(data_rel, i, &rel) == NULL)
1186                                         continue;
1187                                 rela.r_offset = rel.r_offset;
1188                                 rela.r_info = rel.r_info;
1189                                 rela.r_addend = 0;
1190                         }
1191 
1192                         if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
1193                             &rsym) == NULL) {
1194                                 dt_strtab_destroy(strtab);
1195                                 goto err;
1196                         }
1197 
1198                         s = (char *)data_str->d_buf + rsym.st_name;
1199 
1200                         if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1201                                 continue;
1202 
1203                         if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
1204                             shdr_rel.sh_info, &fsym) != 0) {
1205                                 dt_strtab_destroy(strtab);
1206                                 goto err;
1207                         }
1208 
1209                         if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL)
1210                                 continue;
1211 
1212                         if (fsym.st_name > data_str->d_size) {
1213                                 dt_strtab_destroy(strtab);
1214                                 goto err;
1215                         }
1216 
1217                         s = (char *)data_str->d_buf + fsym.st_name;
1218 
1219                         /*
1220                          * If this symbol isn't of type function, we've really
1221                          * driven off the rails or the object file is corrupt.
1222                          */
1223                         if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) {
1224                                 dt_strtab_destroy(strtab);
1225                                 return (dt_link_error(dtp, elf, fd, bufs,
1226                                     "expected %s to be of type function", s));
1227                         }
1228 
1229                         len = snprintf(NULL, 0, dt_symfmt, dt_symprefix,
1230                             objkey, s) + 1;
1231                         if ((p = dt_alloc(dtp, len)) == NULL) {
1232                                 dt_strtab_destroy(strtab);
1233                                 goto err;
1234                         }
1235                         (void) snprintf(p, len, dt_symfmt, dt_symprefix,
1236                             objkey, s);
1237 
1238                         if (dt_strtab_index(strtab, p) == -1) {
1239                                 nsym++;
1240                                 (void) dt_strtab_insert(strtab, p);
1241                         }
1242 
1243                         dt_free(dtp, p);
1244                 }
1245 
1246                 /*
1247                  * If needed, allocate the additional space for the symbol
1248                  * table and string table copying the old data into the new
1249                  * buffers, and marking the buffers as dirty. We inject those
1250                  * newly allocated buffers into the libelf data structures, but
1251                  * are still responsible for freeing them once we're done with
1252                  * the elf handle.
1253                  */
1254                 if (nsym > 0) {
1255                         /*
1256                          * The first byte of the string table is reserved for
1257                          * the \0 entry.
1258                          */
1259                         len = dt_strtab_size(strtab) - 1;
1260 
1261                         assert(len > 0);
1262                         assert(dt_strtab_index(strtab, "") == 0);
1263 
1264                         dt_strtab_destroy(strtab);
1265 
1266                         if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL)
1267                                 goto err;
1268 
1269                         if ((pair->dlp_str = dt_alloc(dtp, len)) == NULL) {
1270                                 dt_free(dtp, pair);
1271                                 goto err;
1272                         }
1273 
1274                         if ((pair->dlp_sym =
1275                             dt_alloc(dtp, nsym * symsize)) == NULL) {
1276                                 dt_free(dtp, pair->dlp_str);
1277                                 dt_free(dtp, pair);
1278                                 goto err;
1279                         }
1280 
1281                         pair->dlp_next = bufs;
1282                         bufs = pair;
1283 
1284                         if ((data_newstr = elf_newdata(scn_str)) == NULL)
1285                                 goto err;
1286                         data_newstr->d_size = len;
1287                         data_newstr->d_buf = pair->dlp_str;
1288 
1289                         if ((data_newsym = elf_newdata(scn_sym)) == NULL)
1290                                 goto err;
1291                         data_newsym->d_size = nsym * symsize;
1292                         data_newsym->d_buf = pair->dlp_sym;
1293 
1294                         nsym += isym;
1295                 } else {
1296                         dt_strtab_destroy(strtab);
1297                 }
1298 
1299                 /*
1300                  * Now that the tables have been allocated, perform the
1301                  * modifications described above.
1302                  */
1303                 for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1304 
1305                         if (shdr_rel.sh_type == SHT_RELA) {
1306                                 if (gelf_getrela(data_rel, i, &rela) == NULL)
1307                                         continue;
1308                         } else {
1309                                 GElf_Rel rel;
1310                                 if (gelf_getrel(data_rel, i, &rel) == NULL)
1311                                         continue;
1312                                 rela.r_offset = rel.r_offset;
1313                                 rela.r_info = rel.r_info;
1314                                 rela.r_addend = 0;
1315                         }
1316 
1317                         ndx = GELF_R_SYM(rela.r_info);
1318 
1319                         if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
1320                             rsym.st_name > data_str->d_size)
1321                                 goto err;
1322 
1323                         s = (char *)data_str->d_buf + rsym.st_name;
1324 
1325                         if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1326                                 continue;
1327 
1328                         s += sizeof (dt_prefix) - 1;
1329 
1330                         /*
1331                          * Check to see if this is an 'is-enabled' check as
1332                          * opposed to a normal probe.
1333                          */
1334                         if (strncmp(s, dt_enabled,
1335                             sizeof (dt_enabled) - 1) == 0) {
1336                                 s += sizeof (dt_enabled) - 1;
1337                                 eprobe = 1;
1338                                 *eprobesp = 1;
1339                                 dt_dprintf("is-enabled probe\n");
1340                         } else {
1341                                 eprobe = 0;
1342                                 dt_dprintf("normal probe\n");
1343                         }
1344 
1345                         if (*s++ != '_')
1346                                 goto err;
1347 
1348                         if ((p = strstr(s, "___")) == NULL ||
1349                             p - s >= sizeof (pname))
1350                                 goto err;
1351 
1352                         bcopy(s, pname, p - s);
1353                         pname[p - s] = '\0';
1354 
1355                         p = strhyphenate(p + 3); /* strlen("___") */
1356 
1357                         if (dt_symtab_lookup(data_newsym, newsym,
1358                             rela.r_offset, shdr_rel.sh_info, &fsym) == 0) {
1359                                 if (fsym.st_name >= data_str->d_size +
1360                                     data_newstr->d_size)
1361                                         goto err;
1362                                 s = (char *)data_newstr->d_buf +
1363                                     fsym.st_name - data_str->d_size;
1364                         } else if (dt_symtab_lookup(data_sym, isym,
1365                             rela.r_offset, shdr_rel.sh_info, &fsym) == 0) {
1366                                 if (fsym.st_name >= data_str->d_size)
1367                                         goto err;
1368                                 s = (char *)data_str->d_buf + fsym.st_name;
1369                         } else {
1370                                 goto err;
1371                         }
1372 
1373                         assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
1374 
1375                         /*
1376                          * If a NULL relocation name is passed to
1377                          * dt_probe_define(), the function name is used for the
1378                          * relocation. The relocation needs to use a mangled
1379                          * name if the symbol is locally scoped; the function
1380                          * name may need to change if we've found the global
1381                          * alias for the locally scoped symbol (we prefer
1382                          * global symbols to locals in dt_symtab_lookup()).
1383                          */
1384                         r = NULL;
1385 
1386                         if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) {
1387                                 dsym = fsym;
1388                                 dsym.st_name = istr;
1389                                 dsym.st_info = GELF_ST_INFO(STB_GLOBAL,
1390                                     STT_FUNC);
1391                                 dsym.st_other =
1392                                     ELF64_ST_VISIBILITY(STV_ELIMINATE);
1393                                 (void)gelf_update_sym(data_newsym, newsym,
1394                                     &dsym);
1395 
1396                                 r = (char *)data_newstr->d_buf +
1397                                     (istr - data_str->d_size);
1398                                 istr += 1 + sprintf(r, dt_symfmt,
1399                                     dt_symprefix, objkey, s);
1400                                 isym++;
1401                                 newsym++;
1402                                 assert(isym <= nsym);
1403 
1404                         } else if (strncmp(s, dt_symprefix,
1405                             strlen(dt_symprefix)) == 0) {
1406                                 r = s;
1407                                 if ((s = strchr(s, '.')) == NULL)
1408                                         goto err;
1409                                 s++;
1410                         }
1411 
1412                         if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) {
1413                                 return (dt_link_error(dtp, elf, fd, bufs,
1414                                     "no such provider %s", pname));
1415                         }
1416 
1417                         if ((prp = dt_probe_lookup(pvp, p)) == NULL) {
1418                                 return (dt_link_error(dtp, elf, fd, bufs,
1419                                     "no such probe %s", p));
1420                         }
1421 
1422                         assert(fsym.st_value <= rela.r_offset);
1423 
1424                         off = rela.r_offset - fsym.st_value;
1425                         if (dt_modtext(dtp, data_tgt->d_buf, eprobe,
1426                             &rela, &off) != 0) {
1427                                 goto err;
1428                         }
1429 
1430                         if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) {
1431                                 return (dt_link_error(dtp, elf, fd, bufs,
1432                                     "failed to allocate space for probe"));
1433                         }
1434 
1435                         mod = 1;
1436                         (void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY);
1437 
1438                         /*
1439                          * This symbol may already have been marked to
1440                          * be ignored by another relocation referencing
1441                          * the same symbol or if this object file has
1442                          * already been processed by an earlier link
1443                          * invocation.
1444                          */
1445                         if (rsym.st_shndx != SHN_SUNW_IGNORE) {
1446                                 rsym.st_shndx = SHN_SUNW_IGNORE;
1447                                 (void) gelf_update_sym(data_sym, ndx, &rsym);
1448                         }
1449                 }
1450         }
1451 
1452         if (mod && elf_update(elf, ELF_C_WRITE) == -1)
1453                 goto err;
1454 
1455         (void) elf_end(elf);
1456         (void) close(fd);
1457 
1458         while ((pair = bufs) != NULL) {
1459                 bufs = pair->dlp_next;
1460                 dt_free(dtp, pair->dlp_str);
1461                 dt_free(dtp, pair->dlp_sym);
1462                 dt_free(dtp, pair);
1463         }
1464 
1465         return (0);
1466 
1467 err:
1468         return (dt_link_error(dtp, elf, fd, bufs,
1469             "an error was encountered while processing %s", obj));
1470 }
1471 
1472 int
1473 dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags,
1474     const char *file, int objc, char *const objv[])
1475 {
1476         char drti[PATH_MAX];
1477         dof_hdr_t *dof;
1478         int fd, status, i, cur;
1479         char *cmd, tmp;
1480         size_t len;
1481         int eprobes = 0, ret = 0;
1482 
1483         /*
1484          * A NULL program indicates a special use in which we just link
1485          * together a bunch of object files specified in objv and then
1486          * unlink(2) those object files.
1487          */
1488         if (pgp == NULL) {
1489                 const char *fmt = "%s -o %s -r";
1490 
1491                 len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1;
1492 
1493                 for (i = 0; i < objc; i++)
1494                         len += strlen(objv[i]) + 1;
1495 
1496                 cmd = alloca(len);
1497 
1498                 cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file);
1499 
1500                 for (i = 0; i < objc; i++)
1501                         cur += snprintf(cmd + cur, len - cur, " %s", objv[i]);
1502 
1503                 if ((status = system(cmd)) == -1) {
1504                         return (dt_link_error(dtp, NULL, -1, NULL,
1505                             "failed to run %s: %s", dtp->dt_ld_path,
1506                             strerror(errno)));
1507                 }
1508 
1509                 if (WIFSIGNALED(status)) {
1510                         return (dt_link_error(dtp, NULL, -1, NULL,
1511                             "failed to link %s: %s failed due to signal %d",
1512                             file, dtp->dt_ld_path, WTERMSIG(status)));
1513                 }
1514 
1515                 if (WEXITSTATUS(status) != 0) {
1516                         return (dt_link_error(dtp, NULL, -1, NULL,
1517                             "failed to link %s: %s exited with status %d\n",
1518                             file, dtp->dt_ld_path, WEXITSTATUS(status)));
1519                 }
1520 
1521                 for (i = 0; i < objc; i++) {
1522                         if (strcmp(objv[i], file) != 0)
1523                                 (void) unlink(objv[i]);
1524                 }
1525 
1526                 return (0);
1527         }
1528 
1529         for (i = 0; i < objc; i++) {
1530                 if (process_obj(dtp, objv[i], &eprobes) != 0)
1531                         return (-1); /* errno is set for us */
1532         }
1533 
1534         /*
1535          * If there are is-enabled probes then we need to force use of DOF
1536          * version 2.
1537          */
1538         if (eprobes && pgp->dp_dofversion < DOF_VERSION_2)
1539                 pgp->dp_dofversion = DOF_VERSION_2;
1540 
1541         if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL)
1542                 return (-1); /* errno is set for us */
1543 
1544         /*
1545          * Create a temporary file and then unlink it if we're going to
1546          * combine it with drti.o later.  We can still refer to it in child
1547          * processes as /dev/fd/<fd>.
1548          */
1549         if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) {
1550                 return (dt_link_error(dtp, NULL, -1, NULL,
1551                     "failed to open %s: %s", file, strerror(errno)));
1552         }
1553 
1554         /*
1555          * If -xlinktype=DOF has been selected, just write out the DOF.
1556          * Otherwise proceed to the default of generating and linking ELF.
1557          */
1558         switch (dtp->dt_linktype) {
1559         case DT_LTYP_DOF:
1560                 if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz)
1561                         ret = errno;
1562 
1563                 if (close(fd) != 0 && ret == 0)
1564                         ret = errno;
1565 
1566                 if (ret != 0) {
1567                         return (dt_link_error(dtp, NULL, -1, NULL,
1568                             "failed to write %s: %s", file, strerror(ret)));
1569                 }
1570 
1571                 return (0);
1572 
1573         case DT_LTYP_ELF:
1574                 break; /* fall through to the rest of dtrace_program_link() */
1575 
1576         default:
1577                 return (dt_link_error(dtp, NULL, -1, NULL,
1578                     "invalid link type %u\n", dtp->dt_linktype));
1579         }
1580 
1581 
1582         if (!dtp->dt_lazyload)
1583                 (void) unlink(file);
1584 
1585         if (dtp->dt_oflags & DTRACE_O_LP64)
1586                 status = dump_elf64(dtp, dof, fd);
1587         else
1588                 status = dump_elf32(dtp, dof, fd);
1589 
1590         if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) {
1591                 return (dt_link_error(dtp, NULL, -1, NULL,
1592                     "failed to write %s: %s", file, strerror(errno)));
1593         }
1594 
1595         if (!dtp->dt_lazyload) {
1596                 const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s";
1597 
1598                 if (dtp->dt_oflags & DTRACE_O_LP64) {
1599                         (void) snprintf(drti, sizeof (drti),
1600                             "%s/64/drti.o", _dtrace_libdir);
1601                 } else {
1602                         (void) snprintf(drti, sizeof (drti),
1603                             "%s/drti.o", _dtrace_libdir);
1604                 }
1605 
1606                 len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd,
1607                     drti) + 1;
1608 
1609                 cmd = alloca(len);
1610 
1611                 (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti);
1612 
1613                 if ((status = system(cmd)) == -1) {
1614                         ret = dt_link_error(dtp, NULL, -1, NULL,
1615                             "failed to run %s: %s", dtp->dt_ld_path,
1616                             strerror(errno));
1617                         goto done;
1618                 }
1619 
1620                 (void) close(fd); /* release temporary file */
1621 
1622                 if (WIFSIGNALED(status)) {
1623                         ret = dt_link_error(dtp, NULL, -1, NULL,
1624                             "failed to link %s: %s failed due to signal %d",
1625                             file, dtp->dt_ld_path, WTERMSIG(status));
1626                         goto done;
1627                 }
1628 
1629                 if (WEXITSTATUS(status) != 0) {
1630                         ret = dt_link_error(dtp, NULL, -1, NULL,
1631                             "failed to link %s: %s exited with status %d\n",
1632                             file, dtp->dt_ld_path, WEXITSTATUS(status));
1633                         goto done;
1634                 }
1635         } else {
1636                 (void) close(fd);
1637         }
1638 
1639 done:
1640         dtrace_dof_destroy(dtp, dof);
1641         return (ret);
1642 }