1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #define ELF_TARGET_ALL
  30 #include <elf.h>
  31 
  32 #include <sys/types.h>
  33 #include <sys/sysmacros.h>
  34 
  35 #include <unistd.h>
  36 #include <strings.h>
  37 #include <alloca.h>
  38 #include <limits.h>
  39 #include <stddef.h>
  40 #include <stdlib.h>
  41 #include <stdio.h>
  42 #include <fcntl.h>
  43 #include <errno.h>
  44 #include <wait.h>
  45 #include <assert.h>
  46 #include <sys/ipc.h>
  47 
  48 #include <dt_impl.h>
  49 #include <dt_provider.h>
  50 #include <dt_program.h>
  51 #include <dt_string.h>
  52 
  53 #define ESHDR_NULL      0
  54 #define ESHDR_SHSTRTAB  1
  55 #define ESHDR_DOF       2
  56 #define ESHDR_STRTAB    3
  57 #define ESHDR_SYMTAB    4
  58 #define ESHDR_REL       5
  59 #define ESHDR_NUM       6
  60 
  61 #define PWRITE_SCN(index, data) \
  62         (lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \
  63         (off64_t)elf_file.shdr[(index)].sh_offset || \
  64         dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \
  65         elf_file.shdr[(index)].sh_size)
  66 
  67 static const char DTRACE_SHSTRTAB32[] = "\0"
  68 ".shstrtab\0"           /* 1 */
  69 ".SUNW_dof\0"           /* 11 */
  70 ".strtab\0"             /* 21 */
  71 ".symtab\0"             /* 29 */
  72 #ifdef __sparc
  73 ".rela.SUNW_dof";       /* 37 */
  74 #else
  75 ".rel.SUNW_dof";        /* 37 */
  76 #endif
  77 
  78 static const char DTRACE_SHSTRTAB64[] = "\0"
  79 ".shstrtab\0"           /* 1 */
  80 ".SUNW_dof\0"           /* 11 */
  81 ".strtab\0"             /* 21 */
  82 ".symtab\0"             /* 29 */
  83 ".rela.SUNW_dof";       /* 37 */
  84 
  85 static const char DOFSTR[] = "__SUNW_dof";
  86 static const char DOFLAZYSTR[] = "___SUNW_dof";
  87 
  88 typedef struct dt_link_pair {
  89         struct dt_link_pair *dlp_next;  /* next pair in linked list */
  90         void *dlp_str;                  /* buffer for string table */
  91         void *dlp_sym;                  /* buffer for symbol table */
  92 } dt_link_pair_t;
  93 
  94 typedef struct dof_elf32 {
  95         uint32_t de_nrel;               /* relocation count */
  96 #ifdef __sparc
  97         Elf32_Rela *de_rel;             /* array of relocations for sparc */
  98 #else
  99         Elf32_Rel *de_rel;              /* array of relocations for x86 */
 100 #endif
 101         uint32_t de_nsym;               /* symbol count */
 102         Elf32_Sym *de_sym;              /* array of symbols */
 103         uint32_t de_strlen;             /* size of of string table */
 104         char *de_strtab;                /* string table */
 105         uint32_t de_global;             /* index of the first global symbol */
 106 } dof_elf32_t;
 107 
 108 static int
 109 prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep)
 110 {
 111         dof_sec_t *dofs, *s;
 112         dof_relohdr_t *dofrh;
 113         dof_relodesc_t *dofr;
 114         char *strtab;
 115         int i, j, nrel;
 116         size_t strtabsz = 1;
 117         uint32_t count = 0;
 118         size_t base;
 119         Elf32_Sym *sym;
 120 #ifdef __sparc
 121         Elf32_Rela *rel;
 122 #else
 123         Elf32_Rel *rel;
 124 #endif
 125 
 126         /*LINTED*/
 127         dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 128 
 129         /*
 130          * First compute the size of the string table and the number of
 131          * relocations present in the DOF.
 132          */
 133         for (i = 0; i < dof->dofh_secnum; i++) {
 134                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 135                         continue;
 136 
 137                 /*LINTED*/
 138                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 139 
 140                 s = &dofs[dofrh->dofr_strtab];
 141                 strtab = (char *)dof + s->dofs_offset;
 142                 assert(strtab[0] == '\0');
 143                 strtabsz += s->dofs_size - 1;
 144 
 145                 s = &dofs[dofrh->dofr_relsec];
 146                 /*LINTED*/
 147                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 148                 count += s->dofs_size / s->dofs_entsize;
 149         }
 150 
 151         dep->de_strlen = strtabsz;
 152         dep->de_nrel = count;
 153         dep->de_nsym = count + 1; /* the first symbol is always null */
 154 
 155         if (dtp->dt_lazyload) {
 156                 dep->de_strlen += sizeof (DOFLAZYSTR);
 157                 dep->de_nsym++;
 158         } else {
 159                 dep->de_strlen += sizeof (DOFSTR);
 160                 dep->de_nsym++;
 161         }
 162 
 163         if ((dep->de_rel = calloc(dep->de_nrel,
 164             sizeof (dep->de_rel[0]))) == NULL) {
 165                 return (dt_set_errno(dtp, EDT_NOMEM));
 166         }
 167 
 168         if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) {
 169                 free(dep->de_rel);
 170                 return (dt_set_errno(dtp, EDT_NOMEM));
 171         }
 172 
 173         if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 174                 free(dep->de_rel);
 175                 free(dep->de_sym);
 176                 return (dt_set_errno(dtp, EDT_NOMEM));
 177         }
 178 
 179         count = 0;
 180         strtabsz = 1;
 181         dep->de_strtab[0] = '\0';
 182         rel = dep->de_rel;
 183         sym = dep->de_sym;
 184         dep->de_global = 1;
 185 
 186         /*
 187          * The first symbol table entry must be zeroed and is always ignored.
 188          */
 189         bzero(sym, sizeof (Elf32_Sym));
 190         sym++;
 191 
 192         /*
 193          * Take a second pass through the DOF sections filling in the
 194          * memory we allocated.
 195          */
 196         for (i = 0; i < dof->dofh_secnum; i++) {
 197                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 198                         continue;
 199 
 200                 /*LINTED*/
 201                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 202 
 203                 s = &dofs[dofrh->dofr_strtab];
 204                 strtab = (char *)dof + s->dofs_offset;
 205                 bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 206                 base = strtabsz;
 207                 strtabsz += s->dofs_size - 1;
 208 
 209                 s = &dofs[dofrh->dofr_relsec];
 210                 /*LINTED*/
 211                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 212                 nrel = s->dofs_size / s->dofs_entsize;
 213 
 214                 s = &dofs[dofrh->dofr_tgtsec];
 215 
 216                 for (j = 0; j < nrel; j++) {
 217 #if defined(__i386) || defined(__amd64)
 218                         rel->r_offset = s->dofs_offset +
 219                             dofr[j].dofr_offset;
 220                         rel->r_info = ELF32_R_INFO(count + dep->de_global,
 221                             R_386_32);
 222 #elif defined(__sparc)
 223                         /*
 224                          * Add 4 bytes to hit the low half of this 64-bit
 225                          * big-endian address.
 226                          */
 227                         rel->r_offset = s->dofs_offset +
 228                             dofr[j].dofr_offset + 4;
 229                         rel->r_info = ELF32_R_INFO(count + dep->de_global,
 230                             R_SPARC_32);
 231 #else
 232 #error unknown ISA
 233 #endif
 234 
 235                         sym->st_name = base + dofr[j].dofr_name - 1;
 236                         sym->st_value = 0;
 237                         sym->st_size = 0;
 238                         sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
 239                         sym->st_other = 0;
 240                         sym->st_shndx = SHN_UNDEF;
 241 
 242                         rel++;
 243                         sym++;
 244                         count++;
 245                 }
 246         }
 247 
 248         /*
 249          * Add a symbol for the DOF itself. We use a different symbol for
 250          * lazily and actively loaded DOF to make them easy to distinguish.
 251          */
 252         sym->st_name = strtabsz;
 253         sym->st_value = 0;
 254         sym->st_size = dof->dofh_filesz;
 255         sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT);
 256         sym->st_other = 0;
 257         sym->st_shndx = ESHDR_DOF;
 258         sym++;
 259 
 260         if (dtp->dt_lazyload) {
 261                 bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 262                     sizeof (DOFLAZYSTR));
 263                 strtabsz += sizeof (DOFLAZYSTR);
 264         } else {
 265                 bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 266                 strtabsz += sizeof (DOFSTR);
 267         }
 268 
 269         assert(count == dep->de_nrel);
 270         assert(strtabsz == dep->de_strlen);
 271 
 272         return (0);
 273 }
 274 
 275 
 276 typedef struct dof_elf64 {
 277         uint32_t de_nrel;
 278         Elf64_Rela *de_rel;
 279         uint32_t de_nsym;
 280         Elf64_Sym *de_sym;
 281 
 282         uint32_t de_strlen;
 283         char *de_strtab;
 284 
 285         uint32_t de_global;
 286 } dof_elf64_t;
 287 
 288 static int
 289 prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep)
 290 {
 291         dof_sec_t *dofs, *s;
 292         dof_relohdr_t *dofrh;
 293         dof_relodesc_t *dofr;
 294         char *strtab;
 295         int i, j, nrel;
 296         size_t strtabsz = 1;
 297         uint32_t count = 0;
 298         size_t base;
 299         Elf64_Sym *sym;
 300         Elf64_Rela *rel;
 301 
 302         /*LINTED*/
 303         dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 304 
 305         /*
 306          * First compute the size of the string table and the number of
 307          * relocations present in the DOF.
 308          */
 309         for (i = 0; i < dof->dofh_secnum; i++) {
 310                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 311                         continue;
 312 
 313                 /*LINTED*/
 314                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 315 
 316                 s = &dofs[dofrh->dofr_strtab];
 317                 strtab = (char *)dof + s->dofs_offset;
 318                 assert(strtab[0] == '\0');
 319                 strtabsz += s->dofs_size - 1;
 320 
 321                 s = &dofs[dofrh->dofr_relsec];
 322                 /*LINTED*/
 323                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 324                 count += s->dofs_size / s->dofs_entsize;
 325         }
 326 
 327         dep->de_strlen = strtabsz;
 328         dep->de_nrel = count;
 329         dep->de_nsym = count + 1; /* the first symbol is always null */
 330 
 331         if (dtp->dt_lazyload) {
 332                 dep->de_strlen += sizeof (DOFLAZYSTR);
 333                 dep->de_nsym++;
 334         } else {
 335                 dep->de_strlen += sizeof (DOFSTR);
 336                 dep->de_nsym++;
 337         }
 338 
 339         if ((dep->de_rel = calloc(dep->de_nrel,
 340             sizeof (dep->de_rel[0]))) == NULL) {
 341                 return (dt_set_errno(dtp, EDT_NOMEM));
 342         }
 343 
 344         if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) {
 345                 free(dep->de_rel);
 346                 return (dt_set_errno(dtp, EDT_NOMEM));
 347         }
 348 
 349         if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 350                 free(dep->de_rel);
 351                 free(dep->de_sym);
 352                 return (dt_set_errno(dtp, EDT_NOMEM));
 353         }
 354 
 355         count = 0;
 356         strtabsz = 1;
 357         dep->de_strtab[0] = '\0';
 358         rel = dep->de_rel;
 359         sym = dep->de_sym;
 360         dep->de_global = 1;
 361 
 362         /*
 363          * The first symbol table entry must be zeroed and is always ignored.
 364          */
 365         bzero(sym, sizeof (Elf64_Sym));
 366         sym++;
 367 
 368         /*
 369          * Take a second pass through the DOF sections filling in the
 370          * memory we allocated.
 371          */
 372         for (i = 0; i < dof->dofh_secnum; i++) {
 373                 if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 374                         continue;
 375 
 376                 /*LINTED*/
 377                 dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 378 
 379                 s = &dofs[dofrh->dofr_strtab];
 380                 strtab = (char *)dof + s->dofs_offset;
 381                 bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 382                 base = strtabsz;
 383                 strtabsz += s->dofs_size - 1;
 384 
 385                 s = &dofs[dofrh->dofr_relsec];
 386                 /*LINTED*/
 387                 dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 388                 nrel = s->dofs_size / s->dofs_entsize;
 389 
 390                 s = &dofs[dofrh->dofr_tgtsec];
 391 
 392                 for (j = 0; j < nrel; j++) {
 393 #if defined(__i386) || defined(__amd64)
 394                         rel->r_offset = s->dofs_offset +
 395                             dofr[j].dofr_offset;
 396                         rel->r_info = ELF64_R_INFO(count + dep->de_global,
 397                             R_AMD64_64);
 398 #elif defined(__sparc)
 399                         rel->r_offset = s->dofs_offset +
 400                             dofr[j].dofr_offset;
 401                         rel->r_info = ELF64_R_INFO(count + dep->de_global,
 402                             R_SPARC_64);
 403 #else
 404 #error unknown ISA
 405 #endif
 406 
 407                         sym->st_name = base + dofr[j].dofr_name - 1;
 408                         sym->st_value = 0;
 409                         sym->st_size = 0;
 410                         sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
 411                         sym->st_other = 0;
 412                         sym->st_shndx = SHN_UNDEF;
 413 
 414                         rel++;
 415                         sym++;
 416                         count++;
 417                 }
 418         }
 419 
 420         /*
 421          * Add a symbol for the DOF itself. We use a different symbol for
 422          * lazily and actively loaded DOF to make them easy to distinguish.
 423          */
 424         sym->st_name = strtabsz;
 425         sym->st_value = 0;
 426         sym->st_size = dof->dofh_filesz;
 427         sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT);
 428         sym->st_other = 0;
 429         sym->st_shndx = ESHDR_DOF;
 430         sym++;
 431 
 432         if (dtp->dt_lazyload) {
 433                 bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 434                     sizeof (DOFLAZYSTR));
 435                 strtabsz += sizeof (DOFLAZYSTR);
 436         } else {
 437                 bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 438                 strtabsz += sizeof (DOFSTR);
 439         }
 440 
 441         assert(count == dep->de_nrel);
 442         assert(strtabsz == dep->de_strlen);
 443 
 444         return (0);
 445 }
 446 
 447 /*
 448  * Write out an ELF32 file prologue consisting of a header, section headers,
 449  * and a section header string table.  The DOF data will follow this prologue
 450  * and complete the contents of the given ELF file.
 451  */
 452 static int
 453 dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 454 {
 455         struct {
 456                 Elf32_Ehdr ehdr;
 457                 Elf32_Shdr shdr[ESHDR_NUM];
 458         } elf_file;
 459 
 460         Elf32_Shdr *shp;
 461         Elf32_Off off;
 462         dof_elf32_t de;
 463         int ret = 0;
 464         uint_t nshdr;
 465 
 466         if (prepare_elf32(dtp, dof, &de) != 0)
 467                 return (-1); /* errno is set for us */
 468 
 469         /*
 470          * If there are no relocations, we only need enough sections for
 471          * the shstrtab and the DOF.
 472          */
 473         nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 474 
 475         bzero(&elf_file, sizeof (elf_file));
 476 
 477         elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 478         elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 479         elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 480         elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 481         elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 482         elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32;
 483 #if defined(_BIG_ENDIAN)
 484         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 485 #elif defined(_LITTLE_ENDIAN)
 486         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 487 #endif
 488         elf_file.ehdr.e_type = ET_REL;
 489 #if defined(__sparc)
 490         elf_file.ehdr.e_machine = EM_SPARC;
 491 #elif defined(__i386) || defined(__amd64)
 492         elf_file.ehdr.e_machine = EM_386;
 493 #endif
 494         elf_file.ehdr.e_version = EV_CURRENT;
 495         elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr);
 496         elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr);
 497         elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr);
 498         elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr);
 499         elf_file.ehdr.e_shnum = nshdr;
 500         elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 501         off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr);
 502 
 503         shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 504         shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */
 505         shp->sh_type = SHT_STRTAB;
 506         shp->sh_offset = off;
 507         shp->sh_size = sizeof (DTRACE_SHSTRTAB32);
 508         shp->sh_addralign = sizeof (char);
 509         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 510 
 511         shp = &elf_file.shdr[ESHDR_DOF];
 512         shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */
 513         shp->sh_flags = SHF_ALLOC;
 514         shp->sh_type = SHT_SUNW_dof;
 515         shp->sh_offset = off;
 516         shp->sh_size = dof->dofh_filesz;
 517         shp->sh_addralign = 8;
 518         off = shp->sh_offset + shp->sh_size;
 519 
 520         shp = &elf_file.shdr[ESHDR_STRTAB];
 521         shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */
 522         shp->sh_flags = SHF_ALLOC;
 523         shp->sh_type = SHT_STRTAB;
 524         shp->sh_offset = off;
 525         shp->sh_size = de.de_strlen;
 526         shp->sh_addralign = sizeof (char);
 527         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 528 
 529         shp = &elf_file.shdr[ESHDR_SYMTAB];
 530         shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */
 531         shp->sh_flags = SHF_ALLOC;
 532         shp->sh_type = SHT_SYMTAB;
 533         shp->sh_entsize = sizeof (Elf32_Sym);
 534         shp->sh_link = ESHDR_STRTAB;
 535         shp->sh_offset = off;
 536         shp->sh_info = de.de_global;
 537         shp->sh_size = de.de_nsym * sizeof (Elf32_Sym);
 538         shp->sh_addralign = 4;
 539         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 540 
 541         if (de.de_nrel == 0) {
 542                 if (dt_write(dtp, fd, &elf_file,
 543                     sizeof (elf_file)) != sizeof (elf_file) ||
 544                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 545                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 546                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 547                     PWRITE_SCN(ESHDR_DOF, dof)) {
 548                         ret = dt_set_errno(dtp, errno);
 549                 }
 550         } else {
 551                 shp = &elf_file.shdr[ESHDR_REL];
 552                 shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */
 553                 shp->sh_flags = SHF_ALLOC;
 554 #ifdef __sparc
 555                 shp->sh_type = SHT_RELA;
 556 #else
 557                 shp->sh_type = SHT_REL;
 558 #endif
 559                 shp->sh_entsize = sizeof (de.de_rel[0]);
 560                 shp->sh_link = ESHDR_SYMTAB;
 561                 shp->sh_info = ESHDR_DOF;
 562                 shp->sh_offset = off;
 563                 shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 564                 shp->sh_addralign = 4;
 565 
 566                 if (dt_write(dtp, fd, &elf_file,
 567                     sizeof (elf_file)) != sizeof (elf_file) ||
 568                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 569                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 570                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 571                     PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 572                     PWRITE_SCN(ESHDR_DOF, dof)) {
 573                         ret = dt_set_errno(dtp, errno);
 574                 }
 575         }
 576 
 577         free(de.de_strtab);
 578         free(de.de_sym);
 579         free(de.de_rel);
 580 
 581         return (ret);
 582 }
 583 
 584 /*
 585  * Write out an ELF64 file prologue consisting of a header, section headers,
 586  * and a section header string table.  The DOF data will follow this prologue
 587  * and complete the contents of the given ELF file.
 588  */
 589 static int
 590 dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 591 {
 592         struct {
 593                 Elf64_Ehdr ehdr;
 594                 Elf64_Shdr shdr[ESHDR_NUM];
 595         } elf_file;
 596 
 597         Elf64_Shdr *shp;
 598         Elf64_Off off;
 599         dof_elf64_t de;
 600         int ret = 0;
 601         uint_t nshdr;
 602 
 603         if (prepare_elf64(dtp, dof, &de) != 0)
 604                 return (-1); /* errno is set for us */
 605 
 606         /*
 607          * If there are no relocations, we only need enough sections for
 608          * the shstrtab and the DOF.
 609          */
 610         nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 611 
 612         bzero(&elf_file, sizeof (elf_file));
 613 
 614         elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 615         elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 616         elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 617         elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 618         elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 619         elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
 620 #if defined(_BIG_ENDIAN)
 621         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 622 #elif defined(_LITTLE_ENDIAN)
 623         elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 624 #endif
 625         elf_file.ehdr.e_type = ET_REL;
 626 #if defined(__sparc)
 627         elf_file.ehdr.e_machine = EM_SPARCV9;
 628 #elif defined(__i386) || defined(__amd64)
 629         elf_file.ehdr.e_machine = EM_AMD64;
 630 #endif
 631         elf_file.ehdr.e_version = EV_CURRENT;
 632         elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr);
 633         elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr);
 634         elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr);
 635         elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr);
 636         elf_file.ehdr.e_shnum = nshdr;
 637         elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 638         off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr);
 639 
 640         shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 641         shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */
 642         shp->sh_type = SHT_STRTAB;
 643         shp->sh_offset = off;
 644         shp->sh_size = sizeof (DTRACE_SHSTRTAB64);
 645         shp->sh_addralign = sizeof (char);
 646         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 647 
 648         shp = &elf_file.shdr[ESHDR_DOF];
 649         shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */
 650         shp->sh_flags = SHF_ALLOC;
 651         shp->sh_type = SHT_SUNW_dof;
 652         shp->sh_offset = off;
 653         shp->sh_size = dof->dofh_filesz;
 654         shp->sh_addralign = 8;
 655         off = shp->sh_offset + shp->sh_size;
 656 
 657         shp = &elf_file.shdr[ESHDR_STRTAB];
 658         shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */
 659         shp->sh_flags = SHF_ALLOC;
 660         shp->sh_type = SHT_STRTAB;
 661         shp->sh_offset = off;
 662         shp->sh_size = de.de_strlen;
 663         shp->sh_addralign = sizeof (char);
 664         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 665 
 666         shp = &elf_file.shdr[ESHDR_SYMTAB];
 667         shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */
 668         shp->sh_flags = SHF_ALLOC;
 669         shp->sh_type = SHT_SYMTAB;
 670         shp->sh_entsize = sizeof (Elf64_Sym);
 671         shp->sh_link = ESHDR_STRTAB;
 672         shp->sh_offset = off;
 673         shp->sh_info = de.de_global;
 674         shp->sh_size = de.de_nsym * sizeof (Elf64_Sym);
 675         shp->sh_addralign = 8;
 676         off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 677 
 678         if (de.de_nrel == 0) {
 679                 if (dt_write(dtp, fd, &elf_file,
 680                     sizeof (elf_file)) != sizeof (elf_file) ||
 681                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 682                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 683                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 684                     PWRITE_SCN(ESHDR_DOF, dof)) {
 685                         ret = dt_set_errno(dtp, errno);
 686                 }
 687         } else {
 688                 shp = &elf_file.shdr[ESHDR_REL];
 689                 shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */
 690                 shp->sh_flags = SHF_ALLOC;
 691                 shp->sh_type = SHT_RELA;
 692                 shp->sh_entsize = sizeof (de.de_rel[0]);
 693                 shp->sh_link = ESHDR_SYMTAB;
 694                 shp->sh_info = ESHDR_DOF;
 695                 shp->sh_offset = off;
 696                 shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 697                 shp->sh_addralign = 8;
 698 
 699                 if (dt_write(dtp, fd, &elf_file,
 700                     sizeof (elf_file)) != sizeof (elf_file) ||
 701                     PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 702                     PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 703                     PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 704                     PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 705                     PWRITE_SCN(ESHDR_DOF, dof)) {
 706                         ret = dt_set_errno(dtp, errno);
 707                 }
 708         }
 709 
 710         free(de.de_strtab);
 711         free(de.de_sym);
 712         free(de.de_rel);
 713 
 714         return (ret);
 715 }
 716 
 717 static int
 718 dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn,
 719     GElf_Sym *sym)
 720 {
 721         int i, ret = -1;
 722         GElf_Sym s;
 723 
 724         for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) {
 725                 if (GELF_ST_TYPE(sym->st_info) == STT_FUNC &&
 726                     shn == sym->st_shndx &&
 727                     sym->st_value <= addr &&
 728                     addr < sym->st_value + sym->st_size) {
 729                         if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
 730                                 return (0);
 731 
 732                         ret = 0;
 733                         s = *sym;
 734                 }
 735         }
 736 
 737         if (ret == 0)
 738                 *sym = s;
 739         return (ret);
 740 }
 741 
 742 #if defined(__sparc)
 743 
 744 #define DT_OP_RET               0x81c7e008
 745 #define DT_OP_NOP               0x01000000
 746 #define DT_OP_CALL              0x40000000
 747 #define DT_OP_CLR_O0            0x90102000
 748 
 749 #define DT_IS_MOV_O7(inst)      (((inst) & 0xffffe000) == 0x9e100000)
 750 #define DT_IS_RESTORE(inst)     (((inst) & 0xc1f80000) == 0x81e80000)
 751 #define DT_IS_RETL(inst)        (((inst) & 0xfff83fff) == 0x81c02008)
 752 
 753 #define DT_RS2(inst)            ((inst) & 0x1f)
 754 #define DT_MAKE_RETL(reg)       (0x81c02008 | ((reg) << 14))
 755 
 756 /*ARGSUSED*/
 757 static int
 758 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
 759     uint32_t *off)
 760 {
 761         uint32_t *ip;
 762 
 763         if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 764                 return (-1);
 765 
 766         /*LINTED*/
 767         ip = (uint32_t *)(p + rela->r_offset);
 768 
 769         /*
 770          * We only know about some specific relocation types.
 771          */
 772         if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 &&
 773             GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30)
 774                 return (-1);
 775 
 776         /*
 777          * We may have already processed this object file in an earlier linker
 778          * invocation. Check to see if the present instruction sequence matches
 779          * the one we would install below.
 780          */
 781         if (isenabled) {
 782                 if (ip[0] == DT_OP_NOP) {
 783                         (*off) += sizeof (ip[0]);
 784                         return (0);
 785                 }
 786         } else {
 787                 if (DT_IS_RESTORE(ip[1])) {
 788                         if (ip[0] == DT_OP_RET) {
 789                                 (*off) += sizeof (ip[0]);
 790                                 return (0);
 791                         }
 792                 } else if (DT_IS_MOV_O7(ip[1])) {
 793                         if (DT_IS_RETL(ip[0]))
 794                                 return (0);
 795                 } else {
 796                         if (ip[0] == DT_OP_NOP) {
 797                                 (*off) += sizeof (ip[0]);
 798                                 return (0);
 799                         }
 800                 }
 801         }
 802 
 803         /*
 804          * We only expect call instructions with a displacement of 0.
 805          */
 806         if (ip[0] != DT_OP_CALL) {
 807                 dt_dprintf("found %x instead of a call instruction at %llx\n",
 808                     ip[0], (u_longlong_t)rela->r_offset);
 809                 return (-1);
 810         }
 811 
 812         if (isenabled) {
 813                 /*
 814                  * It would necessarily indicate incorrect usage if an is-
 815                  * enabled probe were tail-called so flag that as an error.
 816                  * It's also potentially (very) tricky to handle gracefully,
 817                  * but could be done if this were a desired use scenario.
 818                  */
 819                 if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) {
 820                         dt_dprintf("tail call to is-enabled probe at %llx\n",
 821                             (u_longlong_t)rela->r_offset);
 822                         return (-1);
 823                 }
 824 
 825 
 826                 /*
 827                  * On SPARC, we take advantage of the fact that the first
 828                  * argument shares the same register as for the return value.
 829                  * The macro handles the work of zeroing that register so we
 830                  * don't need to do anything special here. We instrument the
 831                  * instruction in the delay slot as we'll need to modify the
 832                  * return register after that instruction has been emulated.
 833                  */
 834                 ip[0] = DT_OP_NOP;
 835                 (*off) += sizeof (ip[0]);
 836         } else {
 837                 /*
 838                  * If the call is followed by a restore, it's a tail call so
 839                  * change the call to a ret. If the call if followed by a mov
 840                  * of a register into %o7, it's a tail call in leaf context
 841                  * so change the call to a retl-like instruction that returns
 842                  * to that register value + 8 (rather than the typical %o7 +
 843                  * 8); the delay slot instruction is left, but should have no
 844                  * effect. Otherwise we change the call to be a nop. We
 845                  * identify the subsequent instruction as the probe point in
 846                  * all but the leaf tail-call case to ensure that arguments to
 847                  * the probe are complete and consistent. An astute, though
 848                  * largely hypothetical, observer would note that there is the
 849                  * possibility of a false-positive probe firing if the function
 850                  * contained a branch to the instruction in the delay slot of
 851                  * the call. Fixing this would require significant in-kernel
 852                  * modifications, and isn't worth doing until we see it in the
 853                  * wild.
 854                  */
 855                 if (DT_IS_RESTORE(ip[1])) {
 856                         ip[0] = DT_OP_RET;
 857                         (*off) += sizeof (ip[0]);
 858                 } else if (DT_IS_MOV_O7(ip[1])) {
 859                         ip[0] = DT_MAKE_RETL(DT_RS2(ip[1]));
 860                 } else {
 861                         ip[0] = DT_OP_NOP;
 862                         (*off) += sizeof (ip[0]);
 863                 }
 864         }
 865 
 866         return (0);
 867 }
 868 
 869 #elif defined(__i386) || defined(__amd64)
 870 
 871 #define DT_OP_NOP               0x90
 872 #define DT_OP_RET               0xc3
 873 #define DT_OP_CALL              0xe8
 874 #define DT_OP_JMP32             0xe9
 875 #define DT_OP_REX_RAX           0x48
 876 #define DT_OP_XOR_EAX_0         0x33
 877 #define DT_OP_XOR_EAX_1         0xc0
 878 
 879 static int
 880 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
 881     uint32_t *off)
 882 {
 883         uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1);
 884         uint8_t ret;
 885 
 886         /*
 887          * On x86, the first byte of the instruction is the call opcode and
 888          * the next four bytes are the 32-bit address; the relocation is for
 889          * the address operand. We back up the offset to the first byte of
 890          * the instruction. For is-enabled probes, we later advance the offset
 891          * so that it hits the first nop in the instruction sequence.
 892          */
 893         (*off) -= 1;
 894 
 895         /*
 896          * We only know about some specific relocation types. Luckily
 897          * these types have the same values on both 32-bit and 64-bit
 898          * x86 architectures.
 899          */
 900         if (GELF_R_TYPE(rela->r_info) != R_386_PC32 &&
 901             GELF_R_TYPE(rela->r_info) != R_386_PLT32)
 902                 return (-1);
 903 
 904         /*
 905          * We may have already processed this object file in an earlier linker
 906          * invocation. Check to see if the present instruction sequence matches
 907          * the one we would install. For is-enabled probes, we advance the
 908          * offset to the first nop instruction in the sequence to match the
 909          * text modification code below.
 910          */
 911         if (!isenabled) {
 912                 if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) &&
 913                     ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP &&
 914                     ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP)
 915                         return (0);
 916         } else if (dtp->dt_oflags & DTRACE_O_LP64) {
 917                 if (ip[0] == DT_OP_REX_RAX &&
 918                     ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 &&
 919                     (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) &&
 920                     ip[4] == DT_OP_NOP) {
 921                         (*off) += 3;
 922                         return (0);
 923                 }
 924         } else {
 925                 if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 &&
 926                     (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) &&
 927                     ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) {
 928                         (*off) += 2;
 929                         return (0);
 930                 }
 931         }
 932 
 933         /*
 934          * We expect either a call instrution with a 32-bit displacement or a
 935          * jmp instruction with a 32-bit displacement acting as a tail-call.
 936          */
 937         if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) {
 938                 dt_dprintf("found %x instead of a call or jmp instruction at "
 939                     "%llx\n", ip[0], (u_longlong_t)rela->r_offset);
 940                 return (-1);
 941         }
 942 
 943         ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP;
 944 
 945         /*
 946          * Establish the instruction sequence -- all nops for probes, and an
 947          * instruction to clear the return value register (%eax/%rax) followed
 948          * by nops for is-enabled probes. For is-enabled probes, we advance
 949          * the offset to the first nop. This isn't stricly necessary but makes
 950          * for more readable disassembly when the probe is enabled.
 951          */
 952         if (!isenabled) {
 953                 ip[0] = ret;
 954                 ip[1] = DT_OP_NOP;
 955                 ip[2] = DT_OP_NOP;
 956                 ip[3] = DT_OP_NOP;
 957                 ip[4] = DT_OP_NOP;
 958         } else if (dtp->dt_oflags & DTRACE_O_LP64) {
 959                 ip[0] = DT_OP_REX_RAX;
 960                 ip[1] = DT_OP_XOR_EAX_0;
 961                 ip[2] = DT_OP_XOR_EAX_1;
 962                 ip[3] = ret;
 963                 ip[4] = DT_OP_NOP;
 964                 (*off) += 3;
 965         } else {
 966                 ip[0] = DT_OP_XOR_EAX_0;
 967                 ip[1] = DT_OP_XOR_EAX_1;
 968                 ip[2] = ret;
 969                 ip[3] = DT_OP_NOP;
 970                 ip[4] = DT_OP_NOP;
 971                 (*off) += 2;
 972         }
 973 
 974         return (0);
 975 }
 976 
 977 #else
 978 #error unknown ISA
 979 #endif
 980 
 981 /*PRINTFLIKE5*/
 982 static int
 983 dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs,
 984     const char *format, ...)
 985 {
 986         va_list ap;
 987         dt_link_pair_t *pair;
 988 
 989         va_start(ap, format);
 990         dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
 991         va_end(ap);
 992 
 993         if (elf != NULL)
 994                 (void) elf_end(elf);
 995 
 996         if (fd >= 0)
 997                 (void) close(fd);
 998 
 999         while ((pair = bufs) != NULL) {
1000                 bufs = pair->dlp_next;
1001                 dt_free(dtp, pair->dlp_str);
1002                 dt_free(dtp, pair->dlp_sym);
1003                 dt_free(dtp, pair);
1004         }
1005 
1006         return (dt_set_errno(dtp, EDT_COMPILER));
1007 }
1008 
1009 static int
1010 process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
1011 {
1012         static const char dt_prefix[] = "__dtrace";
1013         static const char dt_enabled[] = "enabled";
1014         static const char dt_symprefix[] = "$dtrace";
1015         static const char dt_symfmt[] = "%s%d.%s";
1016         int fd, i, ndx, eprobe, mod = 0;
1017         Elf *elf = NULL;
1018         GElf_Ehdr ehdr;
1019         Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt;
1020         Elf_Data *data_rel, *data_sym, *data_str, *data_tgt;
1021         GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt;
1022         GElf_Sym rsym, fsym, dsym;
1023         GElf_Rela rela;
1024         char *s, *p, *r;
1025         char pname[DTRACE_PROVNAMELEN];
1026         dt_provider_t *pvp;
1027         dt_probe_t *prp;
1028         uint32_t off, eclass, emachine1, emachine2;
1029         size_t symsize, nsym, isym, istr, len;
1030         key_t objkey;
1031         dt_link_pair_t *pair, *bufs = NULL;
1032         dt_strtab_t *strtab;
1033 
1034         if ((fd = open64(obj, O_RDWR)) == -1) {
1035                 return (dt_link_error(dtp, elf, fd, bufs,
1036                     "failed to open %s: %s", obj, strerror(errno)));
1037         }
1038 
1039         if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
1040                 return (dt_link_error(dtp, elf, fd, bufs,
1041                     "failed to process %s: %s", obj, elf_errmsg(elf_errno())));
1042         }
1043 
1044         switch (elf_kind(elf)) {
1045         case ELF_K_ELF:
1046                 break;
1047         case ELF_K_AR:
1048                 return (dt_link_error(dtp, elf, fd, bufs, "archives are not "
1049                     "permitted; use the contents of the archive instead: %s",
1050                     obj));
1051         default:
1052                 return (dt_link_error(dtp, elf, fd, bufs,
1053                     "invalid file type: %s", obj));
1054         }
1055 
1056         if (gelf_getehdr(elf, &ehdr) == NULL) {
1057                 return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s",
1058                     obj));
1059         }
1060 
1061         if (dtp->dt_oflags & DTRACE_O_LP64) {
1062                 eclass = ELFCLASS64;
1063 #if defined(__sparc)
1064                 emachine1 = emachine2 = EM_SPARCV9;
1065 #elif defined(__i386) || defined(__amd64)
1066                 emachine1 = emachine2 = EM_AMD64;
1067 #endif
1068                 symsize = sizeof (Elf64_Sym);
1069         } else {
1070                 eclass = ELFCLASS32;
1071 #if defined(__sparc)
1072                 emachine1 = EM_SPARC;
1073                 emachine2 = EM_SPARC32PLUS;
1074 #elif defined(__i386) || defined(__amd64)
1075                 emachine1 = emachine2 = EM_386;
1076 #endif
1077                 symsize = sizeof (Elf32_Sym);
1078         }
1079 
1080         if (ehdr.e_ident[EI_CLASS] != eclass) {
1081                 return (dt_link_error(dtp, elf, fd, bufs,
1082                     "incorrect ELF class for object file: %s", obj));
1083         }
1084 
1085         if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
1086                 return (dt_link_error(dtp, elf, fd, bufs,
1087                     "incorrect ELF machine type for object file: %s", obj));
1088         }
1089 
1090         /*
1091          * We use this token as a relatively unique handle for this file on the
1092          * system in order to disambiguate potential conflicts between files of
1093          * the same name which contain identially named local symbols.
1094          */
1095         if ((objkey = ftok(obj, 0)) == (key_t)-1) {
1096                 return (dt_link_error(dtp, elf, fd, bufs,
1097                     "failed to generate unique key for object file: %s", obj));
1098         }
1099 
1100         scn_rel = NULL;
1101         while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
1102                 if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
1103                         goto err;
1104 
1105                 /*
1106                  * Skip any non-relocation sections.
1107                  */
1108                 if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
1109                         continue;
1110 
1111                 if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
1112                         goto err;
1113 
1114                 /*
1115                  * Grab the section, section header and section data for the
1116                  * symbol table that this relocation section references.
1117                  */
1118                 if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
1119                     gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
1120                     (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
1121                         goto err;
1122 
1123                 /*
1124                  * Ditto for that symbol table's string table.
1125                  */
1126                 if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
1127                     gelf_getshdr(scn_str, &shdr_str) == NULL ||
1128                     (data_str = elf_getdata(scn_str, NULL)) == NULL)
1129                         goto err;
1130 
1131                 /*
1132                  * Grab the section, section header and section data for the
1133                  * target section for the relocations. For the relocations
1134                  * we're looking for -- this will typically be the text of the
1135                  * object file.
1136                  */
1137                 if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
1138                     gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
1139                     (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
1140                         goto err;
1141 
1142                 /*
1143                  * We're looking for relocations to symbols matching this form:
1144                  *
1145                  *   __dtrace[enabled]_<prov>___<probe>
1146                  *
1147                  * For the generated object, we need to record the location
1148                  * identified by the relocation, and create a new relocation
1149                  * in the generated object that will be resolved at link time
1150                  * to the location of the function in which the probe is
1151                  * embedded. In the target object, we change the matched symbol
1152                  * so that it will be ignored at link time, and we modify the
1153                  * target (text) section to replace the call instruction with
1154                  * one or more nops.
1155                  *
1156                  * If the function containing the probe is locally scoped
1157                  * (static), we create an alias used by the relocation in the
1158                  * generated object. The alias, a new symbol, will be global
1159                  * (so that the relocation from the generated object can be
1160                  * resolved), and hidden (so that it is converted to a local
1161                  * symbol at link time). Such aliases have this form:
1162                  *
1163                  *   $dtrace<key>.<function>
1164                  *
1165                  * We take a first pass through all the relocations to
1166                  * populate our string table and count the number of extra
1167                  * symbols we'll require.
1168                  */
1169                 strtab = dt_strtab_create(1);
1170                 nsym = 0;
1171                 isym = data_sym->d_size / symsize;
1172                 istr = data_str->d_size;
1173 
1174                 for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1175 
1176                         if (shdr_rel.sh_type == SHT_RELA) {
1177                                 if (gelf_getrela(data_rel, i, &rela) == NULL)
1178                                         continue;
1179                         } else {
1180                                 GElf_Rel rel;
1181                                 if (gelf_getrel(data_rel, i, &rel) == NULL)
1182                                         continue;
1183                                 rela.r_offset = rel.r_offset;
1184                                 rela.r_info = rel.r_info;
1185                                 rela.r_addend = 0;
1186                         }
1187 
1188                         if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
1189                             &rsym) == NULL) {
1190                                 dt_strtab_destroy(strtab);
1191                                 goto err;
1192                         }
1193 
1194                         s = (char *)data_str->d_buf + rsym.st_name;
1195 
1196                         if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1197                                 continue;
1198 
1199                         if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
1200                             shdr_rel.sh_info, &fsym) != 0) {
1201                                 dt_strtab_destroy(strtab);
1202                                 goto err;
1203                         }
1204 
1205                         if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL)
1206                                 continue;
1207 
1208                         if (fsym.st_name > data_str->d_size) {
1209                                 dt_strtab_destroy(strtab);
1210                                 goto err;
1211                         }
1212 
1213                         s = (char *)data_str->d_buf + fsym.st_name;
1214 
1215                         /*
1216                          * If this symbol isn't of type function, we've really
1217                          * driven off the rails or the object file is corrupt.
1218                          */
1219                         if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) {
1220                                 dt_strtab_destroy(strtab);
1221                                 return (dt_link_error(dtp, elf, fd, bufs,
1222                                     "expected %s to be of type function", s));
1223                         }
1224 
1225                         len = snprintf(NULL, 0, dt_symfmt, dt_symprefix,
1226                             objkey, s) + 1;
1227                         if ((p = dt_alloc(dtp, len)) == NULL) {
1228                                 dt_strtab_destroy(strtab);
1229                                 goto err;
1230                         }
1231                         (void) snprintf(p, len, dt_symfmt, dt_symprefix,
1232                             objkey, s);
1233 
1234                         if (dt_strtab_index(strtab, p) == -1) {
1235                                 nsym++;
1236                                 (void) dt_strtab_insert(strtab, p);
1237                         }
1238 
1239                         dt_free(dtp, p);
1240                 }
1241 
1242                 /*
1243                  * If needed, allocate the additional space for the symbol
1244                  * table and string table copying the old data into the new
1245                  * buffers, and marking the buffers as dirty. We inject those
1246                  * newly allocated buffers into the libelf data structures, but
1247                  * are still responsible for freeing them once we're done with
1248                  * the elf handle.
1249                  */
1250                 if (nsym > 0) {
1251                         /*
1252                          * The first byte of the string table is reserved for
1253                          * the \0 entry.
1254                          */
1255                         len = dt_strtab_size(strtab) - 1;
1256 
1257                         assert(len > 0);
1258                         assert(dt_strtab_index(strtab, "") == 0);
1259 
1260                         dt_strtab_destroy(strtab);
1261 
1262                         if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL)
1263                                 goto err;
1264 
1265                         if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size +
1266                             len)) == NULL) {
1267                                 dt_free(dtp, pair);
1268                                 goto err;
1269                         }
1270 
1271                         if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size +
1272                             nsym * symsize)) == NULL) {
1273                                 dt_free(dtp, pair->dlp_str);
1274                                 dt_free(dtp, pair);
1275                                 goto err;
1276                         }
1277 
1278                         pair->dlp_next = bufs;
1279                         bufs = pair;
1280 
1281                         bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size);
1282                         data_str->d_buf = pair->dlp_str;
1283                         data_str->d_size += len;
1284                         (void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY);
1285 
1286                         shdr_str.sh_size += len;
1287                         (void) gelf_update_shdr(scn_str, &shdr_str);
1288 
1289                         bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size);
1290                         data_sym->d_buf = pair->dlp_sym;
1291                         data_sym->d_size += nsym * symsize;
1292                         (void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY);
1293 
1294                         shdr_sym.sh_size += nsym * symsize;
1295                         (void) gelf_update_shdr(scn_sym, &shdr_sym);
1296 
1297                         nsym += isym;
1298                 } else {
1299                         dt_strtab_destroy(strtab);
1300                 }
1301 
1302                 /*
1303                  * Now that the tables have been allocated, perform the
1304                  * modifications described above.
1305                  */
1306                 for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
1307 
1308                         if (shdr_rel.sh_type == SHT_RELA) {
1309                                 if (gelf_getrela(data_rel, i, &rela) == NULL)
1310                                         continue;
1311                         } else {
1312                                 GElf_Rel rel;
1313                                 if (gelf_getrel(data_rel, i, &rel) == NULL)
1314                                         continue;
1315                                 rela.r_offset = rel.r_offset;
1316                                 rela.r_info = rel.r_info;
1317                                 rela.r_addend = 0;
1318                         }
1319 
1320                         ndx = GELF_R_SYM(rela.r_info);
1321 
1322                         if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
1323                             rsym.st_name > data_str->d_size)
1324                                 goto err;
1325 
1326                         s = (char *)data_str->d_buf + rsym.st_name;
1327 
1328                         if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
1329                                 continue;
1330 
1331                         s += sizeof (dt_prefix) - 1;
1332 
1333                         /*
1334                          * Check to see if this is an 'is-enabled' check as
1335                          * opposed to a normal probe.
1336                          */
1337                         if (strncmp(s, dt_enabled,
1338                             sizeof (dt_enabled) - 1) == 0) {
1339                                 s += sizeof (dt_enabled) - 1;
1340                                 eprobe = 1;
1341                                 *eprobesp = 1;
1342                                 dt_dprintf("is-enabled probe\n");
1343                         } else {
1344                                 eprobe = 0;
1345                                 dt_dprintf("normal probe\n");
1346                         }
1347 
1348                         if (*s++ != '_')
1349                                 goto err;
1350 
1351                         if ((p = strstr(s, "___")) == NULL ||
1352                             p - s >= sizeof (pname))
1353                                 goto err;
1354 
1355                         bcopy(s, pname, p - s);
1356                         pname[p - s] = '\0';
1357 
1358                         p = strhyphenate(p + 3); /* strlen("___") */
1359 
1360                         if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
1361                             shdr_rel.sh_info, &fsym) != 0)
1362                                 goto err;
1363 
1364                         if (fsym.st_name > data_str->d_size)
1365                                 goto err;
1366 
1367                         assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
1368 
1369                         /*
1370                          * If a NULL relocation name is passed to
1371                          * dt_probe_define(), the function name is used for the
1372                          * relocation. The relocation needs to use a mangled
1373                          * name if the symbol is locally scoped; the function
1374                          * name may need to change if we've found the global
1375                          * alias for the locally scoped symbol (we prefer
1376                          * global symbols to locals in dt_symtab_lookup()).
1377                          */
1378                         s = (char *)data_str->d_buf + fsym.st_name;
1379                         r = NULL;
1380 
1381                         if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) {
1382                                 dsym = fsym;
1383                                 dsym.st_name = istr;
1384                                 dsym.st_info = GELF_ST_INFO(STB_GLOBAL,
1385                                     STT_FUNC);
1386                                 dsym.st_other =
1387                                     ELF64_ST_VISIBILITY(STV_ELIMINATE);
1388                                 (void) gelf_update_sym(data_sym, isym, &dsym);
1389 
1390                                 r = (char *)data_str->d_buf + istr;
1391                                 istr += 1 + sprintf(r, dt_symfmt,
1392                                     dt_symprefix, objkey, s);
1393                                 isym++;
1394                                 assert(isym <= nsym);
1395 
1396                         } else if (strncmp(s, dt_symprefix,
1397                             strlen(dt_symprefix)) == 0) {
1398                                 r = s;
1399                                 if ((s = strchr(s, '.')) == NULL)
1400                                         goto err;
1401                                 s++;
1402                         }
1403 
1404                         if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) {
1405                                 return (dt_link_error(dtp, elf, fd, bufs,
1406                                     "no such provider %s", pname));
1407                         }
1408 
1409                         if ((prp = dt_probe_lookup(pvp, p)) == NULL) {
1410                                 return (dt_link_error(dtp, elf, fd, bufs,
1411                                     "no such probe %s", p));
1412                         }
1413 
1414                         assert(fsym.st_value <= rela.r_offset);
1415 
1416                         off = rela.r_offset - fsym.st_value;
1417                         if (dt_modtext(dtp, data_tgt->d_buf, eprobe,
1418                             &rela, &off) != 0) {
1419                                 goto err;
1420                         }
1421 
1422                         if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) {
1423                                 return (dt_link_error(dtp, elf, fd, bufs,
1424                                     "failed to allocate space for probe"));
1425                         }
1426 
1427                         mod = 1;
1428                         (void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY);
1429 
1430                         /*
1431                          * This symbol may already have been marked to
1432                          * be ignored by another relocation referencing
1433                          * the same symbol or if this object file has
1434                          * already been processed by an earlier link
1435                          * invocation.
1436                          */
1437                         if (rsym.st_shndx != SHN_SUNW_IGNORE) {
1438                                 rsym.st_shndx = SHN_SUNW_IGNORE;
1439                                 (void) gelf_update_sym(data_sym, ndx, &rsym);
1440                         }
1441                 }
1442         }
1443 
1444         if (mod && elf_update(elf, ELF_C_WRITE) == -1)
1445                 goto err;
1446 
1447         (void) elf_end(elf);
1448         (void) close(fd);
1449 
1450         while ((pair = bufs) != NULL) {
1451                 bufs = pair->dlp_next;
1452                 dt_free(dtp, pair->dlp_str);
1453                 dt_free(dtp, pair->dlp_sym);
1454                 dt_free(dtp, pair);
1455         }
1456 
1457         return (0);
1458 
1459 err:
1460         return (dt_link_error(dtp, elf, fd, bufs,
1461             "an error was encountered while processing %s", obj));
1462 }
1463 
1464 int
1465 dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags,
1466     const char *file, int objc, char *const objv[])
1467 {
1468         char drti[PATH_MAX];
1469         dof_hdr_t *dof;
1470         int fd, status, i, cur;
1471         char *cmd, tmp;
1472         size_t len;
1473         int eprobes = 0, ret = 0;
1474 
1475         /*
1476          * A NULL program indicates a special use in which we just link
1477          * together a bunch of object files specified in objv and then
1478          * unlink(2) those object files.
1479          */
1480         if (pgp == NULL) {
1481                 const char *fmt = "%s -o %s -r";
1482 
1483                 len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1;
1484 
1485                 for (i = 0; i < objc; i++)
1486                         len += strlen(objv[i]) + 1;
1487 
1488                 cmd = alloca(len);
1489 
1490                 cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file);
1491 
1492                 for (i = 0; i < objc; i++)
1493                         cur += snprintf(cmd + cur, len - cur, " %s", objv[i]);
1494 
1495                 if ((status = system(cmd)) == -1) {
1496                         return (dt_link_error(dtp, NULL, -1, NULL,
1497                             "failed to run %s: %s", dtp->dt_ld_path,
1498                             strerror(errno)));
1499                 }
1500 
1501                 if (WIFSIGNALED(status)) {
1502                         return (dt_link_error(dtp, NULL, -1, NULL,
1503                             "failed to link %s: %s failed due to signal %d",
1504                             file, dtp->dt_ld_path, WTERMSIG(status)));
1505                 }
1506 
1507                 if (WEXITSTATUS(status) != 0) {
1508                         return (dt_link_error(dtp, NULL, -1, NULL,
1509                             "failed to link %s: %s exited with status %d\n",
1510                             file, dtp->dt_ld_path, WEXITSTATUS(status)));
1511                 }
1512 
1513                 for (i = 0; i < objc; i++) {
1514                         if (strcmp(objv[i], file) != 0)
1515                                 (void) unlink(objv[i]);
1516                 }
1517 
1518                 return (0);
1519         }
1520 
1521         for (i = 0; i < objc; i++) {
1522                 if (process_obj(dtp, objv[i], &eprobes) != 0)
1523                         return (-1); /* errno is set for us */
1524         }
1525 
1526         /*
1527          * If there are is-enabled probes then we need to force use of DOF
1528          * version 2.
1529          */
1530         if (eprobes && pgp->dp_dofversion < DOF_VERSION_2)
1531                 pgp->dp_dofversion = DOF_VERSION_2;
1532 
1533         if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL)
1534                 return (-1); /* errno is set for us */
1535 
1536         /*
1537          * Create a temporary file and then unlink it if we're going to
1538          * combine it with drti.o later.  We can still refer to it in child
1539          * processes as /dev/fd/<fd>.
1540          */
1541         if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) {
1542                 return (dt_link_error(dtp, NULL, -1, NULL,
1543                     "failed to open %s: %s", file, strerror(errno)));
1544         }
1545 
1546         /*
1547          * If -xlinktype=DOF has been selected, just write out the DOF.
1548          * Otherwise proceed to the default of generating and linking ELF.
1549          */
1550         switch (dtp->dt_linktype) {
1551         case DT_LTYP_DOF:
1552                 if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz)
1553                         ret = errno;
1554 
1555                 if (close(fd) != 0 && ret == 0)
1556                         ret = errno;
1557 
1558                 if (ret != 0) {
1559                         return (dt_link_error(dtp, NULL, -1, NULL,
1560                             "failed to write %s: %s", file, strerror(ret)));
1561                 }
1562 
1563                 return (0);
1564 
1565         case DT_LTYP_ELF:
1566                 break; /* fall through to the rest of dtrace_program_link() */
1567 
1568         default:
1569                 return (dt_link_error(dtp, NULL, -1, NULL,
1570                     "invalid link type %u\n", dtp->dt_linktype));
1571         }
1572 
1573 
1574         if (!dtp->dt_lazyload)
1575                 (void) unlink(file);
1576 
1577         if (dtp->dt_oflags & DTRACE_O_LP64)
1578                 status = dump_elf64(dtp, dof, fd);
1579         else
1580                 status = dump_elf32(dtp, dof, fd);
1581 
1582         if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) {
1583                 return (dt_link_error(dtp, NULL, -1, NULL,
1584                     "failed to write %s: %s", file, strerror(errno)));
1585         }
1586 
1587         if (!dtp->dt_lazyload) {
1588                 const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s";
1589 
1590                 if (dtp->dt_oflags & DTRACE_O_LP64) {
1591                         (void) snprintf(drti, sizeof (drti),
1592                             "%s/64/drti.o", _dtrace_libdir);
1593                 } else {
1594                         (void) snprintf(drti, sizeof (drti),
1595                             "%s/drti.o", _dtrace_libdir);
1596                 }
1597 
1598                 len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd,
1599                     drti) + 1;
1600 
1601                 cmd = alloca(len);
1602 
1603                 (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti);
1604 
1605                 if ((status = system(cmd)) == -1) {
1606                         ret = dt_link_error(dtp, NULL, -1, NULL,
1607                             "failed to run %s: %s", dtp->dt_ld_path,
1608                             strerror(errno));
1609                         goto done;
1610                 }
1611 
1612                 (void) close(fd); /* release temporary file */
1613 
1614                 if (WIFSIGNALED(status)) {
1615                         ret = dt_link_error(dtp, NULL, -1, NULL,
1616                             "failed to link %s: %s failed due to signal %d",
1617                             file, dtp->dt_ld_path, WTERMSIG(status));
1618                         goto done;
1619                 }
1620 
1621                 if (WEXITSTATUS(status) != 0) {
1622                         ret = dt_link_error(dtp, NULL, -1, NULL,
1623                             "failed to link %s: %s exited with status %d\n",
1624                             file, dtp->dt_ld_path, WEXITSTATUS(status));
1625                         goto done;
1626                 }
1627         } else {
1628                 (void) close(fd);
1629         }
1630 
1631 done:
1632         dtrace_dof_destroy(dtp, dof);
1633         return (ret);
1634 }