天天看點

Android源碼學習——linker(4)

本文學習的源碼參考AndroidXRef,版本為Lollipop 5.1.0_r1。

前面講完了so的加載,這一章來講so的連結過程。so的連結是實際上就是完成符号的重定位。

分别看下PrelinkImage和LinkImage的實作。首先是PrelinkImage,這個函數很長,我們一段段來看:

bool soinfo::PrelinkImage() {
  /* Extract dynamic section */
  ElfW(Word) dynamic_flags = ;
  phdr_table_get_dynamic_section(phdr, phnum, load_bias, &dynamic, &dynamic_flags);

  /* We can't log anything until the linker is relocated */
  bool relocating_linker = (flags & FLAG_LINKER) != ;
  if (!relocating_linker) {
    INFO("[ linking %s ]", name);
    DEBUG("si->base = %p si->flags = 0x%08x", reinterpret_cast<void*>(base), flags);
  }

  if (dynamic == nullptr) {
    if (!relocating_linker) {
      DL_ERR("missing PT_DYNAMIC in \"%s\"", name);
    }
    return false;
  } else {
    if (!relocating_linker) {
      DEBUG("dynamic = %p", dynamic);
    }
  }

#if defined(__arm__)
  (void) phdr_table_get_arm_exidx(phdr, phnum, load_bias,
                                  &ARM_exidx, &ARM_exidx_count);
#endif

  ......
           

首先是調用

phdr_table_get_dynamic_section

擷取動态節區。

看下怎麼獲得的:

void phdr_table_get_dynamic_section(const ELF::Phdr* phdr_table,
                                    int phdr_count,
                                    ELF::Addr load_bias,
                                    const ELF::Dyn** dynamic,
                                    size_t* dynamic_count,
                                    ELF::Word* dynamic_flags) {
  const ELF::Phdr* phdr = phdr_table;
  const ELF::Phdr* phdr_limit = phdr + phdr_count;

  for (phdr = phdr_table; phdr < phdr_limit; phdr++) {
    if (phdr->p_type != PT_DYNAMIC) {
      continue;
    }

    *dynamic = reinterpret_cast<const ELF::Dyn*>(load_bias + phdr->p_vaddr);
    if (dynamic_count) {
      *dynamic_count = (unsigned)(phdr->p_memsz / sizeof(ELF::Dyn));
    }
    if (dynamic_flags) {
      *dynamic_flags = phdr->p_flags;
    }
    return;
  }
  *dynamic = NULL;
  if (dynamic_count) {
    *dynamic_count = ;
  }
}
           

從第一個程式頭表項開始周遊,找類型為PT_DYNAMIC的項,那麼就可以找到這一段對應的動态節區。并且,用該段記憶體大小p_memsz 除以一個動态節區符号對象的大小sizeof(ELF::Dyn))得到動态節區中符号的數目。

回到PrelinkImage中,繼續往下看:

// Extract useful information from dynamic section.
  uint32_t needed_count = ;
  for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {
    DEBUG("d = %p, d[0](tag) = %p d[1](val) = %p",
          d, reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));
    switch (d->d_tag) {
           

然後開始一項項地周遊動态節區裡面的符号對象,看下這個對象的結構:

struct Elf32_Dyn
{
  Elf32_Sword d_tag;            // Type of dynamic table entry.
  union
  {
      Elf32_Word d_val;         // Integer value of entry.
      Elf32_Addr d_ptr;         // Pointer value of entry.
  } d_un;
};
           

兩部分,一個4位元組的d_tag,然後一個4位元組的聯合體,可能為d_val,也可能為一個位址d_ptr。

而這裡對Elf32_Dyn這個結構做解析,就是針對不同的d_tag取值進行不同的操作。

後面内容很長,我們挑幾個重要的來說:

case DT_HASH:
        nbucket = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[];
        nchain = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[];
        bucket = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr + );
        chain = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr +  + nbucket * );
        break;
           

這個動态符号對象是關于哈希表的描述,d_un.d_ptr給出了哈希表的位址。然後就依次可以取到nbucket和nchain,以及儲存符号表索引的bucket和chain數組。這是為了友善我們後面查找符号表。

case DT_STRTAB:
        strtab = reinterpret_cast<const char*>(load_bias + d->d_un.d_ptr);
        break;

      case DT_STRSZ:
        strtab_size = d->d_un.d_val;
        break;
           

分别給出了字元串表的位址和大小(位元組數)。

case DT_SYMTAB:
        symtab = reinterpret_cast<ElfW(Sym)*>(load_bias + d->d_un.d_ptr);
        break;
           

給出了符号表的位址。

case DT_SYMENT:
        if (d->d_un.d_val != sizeof(ElfW(Sym))) {
          DL_ERR("invalid DT_SYMENT: %zd", static_cast<size_t>(d->d_un.d_val));
          return false;
        }
        break;
           

判斷所給的符号表的表項大小是不是正确。

case DT_PLTREL:
#if defined(USE_RELA)
        if (d->d_un.d_val != DT_RELA) {
          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_RELA", name);
          return false;
        }
#else
        if (d->d_un.d_val != DT_REL) {
          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_REL", name);
          return false;
        }
#endif
        break;
           

給出過程連接配接表(PLT)所引用的重定位項的類型,可能為DT_RELA(元素為顯示對齊)或DT_REL(元素為隐式對齊)。

case DT_JMPREL:
#if defined(USE_RELA)
        plt_rela = reinterpret_cast<ElfW(Rela)*>(load_bias + d->d_un.d_ptr);
#else
        plt_rel = reinterpret_cast<ElfW(Rel)*>(load_bias + d->d_un.d_ptr);
#endif
        break;

      case DT_PLTRELSZ:
#if defined(USE_RELA)
        plt_rela_count = d->d_un.d_val / sizeof(ElfW(Rela));
#else
        plt_rel_count = d->d_un.d_val / sizeof(ElfW(Rel));
#endif
        break;
           

DT_JMPREL指明了重定位表的位址,而DT_PLTRELSZ則指明了重定位表的大小(位元組數)。

case DT_PLTGOT:
#if defined(__mips__)
        // Used by mips and mips64.
        plt_got = reinterpret_cast<ElfW(Addr)**>(load_bias + d->d_un.d_ptr);
#endif
        // Ignore for other platforms... (because RTLD_LAZY is not supported)
        break;
           

如果是mips架構,會給出一個跟過程連結表(PLT)關聯的全局偏移表(GOT)的位址,但是其他平台上并不支援RTLD_LAZY ,是以不需要這一項。

case DT_INIT:
        init_func = reinterpret_cast<linker_function_t>(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT) found at %p", name, init_func);
        break;

      case DT_FINI:
        fini_func = reinterpret_cast<linker_function_t>(load_bias + d->d_un.d_ptr);
        DEBUG("%s destructors (DT_FINI) found at %p", name, fini_func);
        break;

      case DT_INIT_ARRAY:
        init_array = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", name, init_array);
        break;

      case DT_INIT_ARRAYSZ:
        init_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;

      case DT_FINI_ARRAY:
        fini_array = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
        DEBUG("%s destructors (DT_FINI_ARRAY) found at %p", name, fini_array);
        break;

      case DT_FINI_ARRAYSZ:
        fini_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;

      case DT_PREINIT_ARRAY:
        preinit_array = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_PREINIT_ARRAY) found at %p", name, preinit_array);
        break;

      case DT_PREINIT_ARRAYSZ:
        preinit_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;
           

分别為初始化函數(init,為初始化函數指令)位址、結束函數位址、初始化函數數組(init_array,其實裡面是一些函數位址)的位址、數組項數、結束函數數組的位址、數組項數,以及預初始化函數數組的位址、數組項數。

最後一部分:

// Sanity checks.
  if (relocating_linker && needed_count != ) {
    DL_ERR("linker cannot have DT_NEEDED dependencies on other libraries");
    return false;
  }
  if (nbucket == ) {
    DL_ERR("empty/missing DT_HASH in \"%s\" (built with --hash-style=gnu?)", name);
    return false;
  }
  if (strtab == ) {
    DL_ERR("empty/missing DT_STRTAB in \"%s\"", name);
    return false;
  }
  if (symtab == ) {
    DL_ERR("empty/missing DT_SYMTAB in \"%s\"", name);
    return false;
  }
  return true;
}
           

會對提取到的值做一些檢查,并傳回,PrelinkImage就完成了。

這裡我們完成了動态節區的解析,重定位需要的重定位表、符号表、哈希表以及字元串表等等也都準備完成 ,接下來就是真正進行重定位的過程了,繼續看下半部分,LinkImage的實作:

bool soinfo::LinkImage(const android_dlextinfo* extinfo) {

#if !defined(__LP64__)
  if (has_text_relocations) {
    // Make segments writable to allow text relocations to work properly. We will later call
    // phdr_table_protect_segments() after all of them are applied and all constructors are run.
    DL_WARN("%s has text relocations. This is wasting memory and prevents "
            "security hardening. Please fix.", name);
    if (phdr_table_unprotect_segments(phdr, phnum, load_bias) < ) {
      DL_ERR("can't unprotect loadable segments for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }
#endif

#if defined(USE_RELA)
  if (rela != nullptr) {
    DEBUG("[ relocating %s ]", name);
    if (Relocate(rela, rela_count)) {
      return false;
    }
  }
  if (plt_rela != nullptr) {
    DEBUG("[ relocating %s plt ]", name);
    if (Relocate(plt_rela, plt_rela_count)) {
      return false;
    }
  }
#else
  if (rel != nullptr) {
    DEBUG("[ relocating %s ]", name);
    if (Relocate(rel, rel_count)) {
      return false;
    }
  }
  if (plt_rel != nullptr) {
    DEBUG("[ relocating %s plt ]", name);
    if (Relocate(plt_rel, plt_rel_count)) {
      return false;
    }
  }
#endif

#if defined(__mips__)
  if (!mips_relocate_got(this)) {
    return false;
  }
#endif

  DEBUG("[ finished linking %s ]", name);

#if !defined(__LP64__)
  if (has_text_relocations) {
    // All relocations are done, we can protect our segments back to read-only.
    if (phdr_table_protect_segments(phdr, phnum, load_bias) < ) {
      DL_ERR("can't protect segments for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }
#endif

  /* We can also turn on GNU RELRO protection */
  if (phdr_table_protect_gnu_relro(phdr, phnum, load_bias) < ) {
    DL_ERR("can't enable GNU RELRO protection for \"%s\": %s",
           name, strerror(errno));
    return false;
  }

  /* Handle serializing/sharing the RELRO segment */
  if (extinfo && (extinfo->flags & ANDROID_DLEXT_WRITE_RELRO)) {
    if (phdr_table_serialize_gnu_relro(phdr, phnum, load_bias,
                                       extinfo->relro_fd) < ) {
      DL_ERR("failed serializing GNU RELRO section for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  } else if (extinfo && (extinfo->flags & ANDROID_DLEXT_USE_RELRO)) {
    if (phdr_table_map_gnu_relro(phdr, phnum, load_bias,
                                 extinfo->relro_fd) < ) {
      DL_ERR("failed mapping GNU RELRO section for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }

  notify_gdb_of_load(this);
  return true;
}
           

重點是兩處重定位的地方,如果是USE_RELA的情況,就去調用

Relocate(rela, rela_count)

Relocate(plt_rela, plt_rela_count)

,另一方面,如果是USE_REL的情況,就去調用

Relocate(rel, rel_count)

Relocate(plt_rel, plt_rel_count)

繼續看Relocate這個函數的實作吧:

bool SharedLibrary::Relocate(LibraryList* lib_list,
                             Vector<LibraryView*>* dependencies,
                             Error* error) {
  // Apply relocations.
  LOG("%s: Applying relocations to %s\n", __FUNCTION__, base_name_);

  ElfRelocations relocations;

  if (!relocations.Init(&view_, error))
    return false;

  SharedLibraryResolver resolver(this, lib_list, dependencies);
  if (!relocations.ApplyAll(&symbols_, &resolver, error))
    return false;

  LOG("%s: Relocations applied for %s\n", __FUNCTION__, base_name_);
  return true;
}
           

主要是初始化了一個ElfRelocations的對象,然後分别去調用了它的

Init

方法和

ApplyAll

方法。

先看init方法:

bool ElfRelocations::Init(const ElfView* view, Error* error) {
  // Save these for later.
  phdr_ = view->phdr();
  phdr_count_ = view->phdr_count();
  load_bias_ = view->load_bias();

  // We handle only Rel or Rela, but not both. If DT_RELA or DT_RELASZ
  // then we require DT_PLTREL to agree.
  bool has_rela_relocations = false;
  bool has_rel_relocations = false;

  // Parse the dynamic table.
  ElfView::DynamicIterator dyn(view);
  for (; dyn.HasNext(); dyn.GetNext()) {
    ELF::Addr dyn_value = dyn.GetValue();
    uintptr_t dyn_addr = dyn.GetAddress(view->load_bias());

    const ELF::Addr tag = dyn.GetTag();
    switch (tag) {
      case DT_PLTREL:
        RLOG("  DT_PLTREL value=%d\n", dyn_value);
        if (dyn_value != DT_REL && dyn_value != DT_RELA) {
          *error = "Invalid DT_PLTREL value in dynamic section";
          return false;
        }
        relocations_type_ = dyn_value;
        break;
      case DT_JMPREL:
        RLOG("  DT_JMPREL addr=%p\n", dyn_addr);
        plt_relocations_ = dyn_addr;
        break;
      case DT_PLTRELSZ:
        plt_relocations_size_ = dyn_value;
        RLOG("  DT_PLTRELSZ size=%d\n", dyn_value);
        break;
      case DT_RELA:
      case DT_REL:
        RLOG("  %s addr=%p\n",
             (tag == DT_RELA) ? "DT_RELA" : "DT_REL",
             dyn_addr);
        if (relocations_) {
          *error = "Unsupported DT_RELA/DT_REL combination in dynamic section";
          return false;
        }
        relocations_ = dyn_addr;
        if (tag == DT_RELA)
          has_rela_relocations = true;
        else
          has_rel_relocations = true;
        break;
      case DT_RELASZ:
      case DT_RELSZ:
        RLOG("  %s size=%d\n",
             (tag == DT_RELASZ) ? "DT_RELASZ" : "DT_RELSZ",
             dyn_addr);
        if (relocations_size_) {
          *error = "Unsupported DT_RELASZ/DT_RELSZ combination in dyn section";
          return false;
        }
        relocations_size_ = dyn_value;
        if (tag == DT_RELASZ)
          has_rela_relocations = true;
        else
          has_rel_relocations = true;
        break;
      case DT_PLTGOT:
        // Only used on MIPS currently. Could also be used on other platforms
        // when lazy binding (i.e. RTLD_LAZY) is implemented.
        RLOG("  DT_PLTGOT addr=%p\n", dyn_addr);
        plt_got_ = reinterpret_cast<ELF::Addr*>(dyn_addr);
        break;
      case DT_TEXTREL:
        RLOG("  DT_TEXTREL\n");
        has_text_relocations_ = true;
        break;
      case DT_SYMBOLIC:
        RLOG("  DT_SYMBOLIC\n");
        has_symbolic_ = true;
        break;
      case DT_FLAGS:
        if (dyn_value & DF_TEXTREL)
          has_text_relocations_ = true;
        if (dyn_value & DF_SYMBOLIC)
          has_symbolic_ = true;
        RLOG(" DT_FLAGS has_text_relocations=%s has_symbolic=%s\n",
             has_text_relocations_ ? "true" : "false",
             has_symbolic_ ? "true" : "false");
        break;
#if defined(__mips__)
      case DT_MIPS_SYMTABNO:
        RLOG("  DT_MIPS_SYMTABNO value=%d\n", dyn_value);
        mips_symtab_count_ = dyn_value;
        break;

      case DT_MIPS_LOCAL_GOTNO:
        RLOG("  DT_MIPS_LOCAL_GOTNO value=%d\n", dyn_value);
        mips_local_got_count_ = dyn_value;
        break;

      case DT_MIPS_GOTSYM:
        RLOG("  DT_MIPS_GOTSYM value=%d\n", dyn_value);
        mips_gotsym_ = dyn_value;
        break;
#endif
      default:
        ;
    }
  }

  if (relocations_type_ != DT_REL && relocations_type_ != DT_RELA) {
    *error = "Unsupported or missing DT_PLTREL in dynamic section";
    return false;
  }

  if (relocations_type_ == DT_REL && has_rela_relocations) {
    *error = "Found DT_RELA in dyn section, but DT_PLTREL is DT_REL";
    return false;
  }
  if (relocations_type_ == DT_RELA && has_rel_relocations) {
    *error = "Found DT_REL in dyn section, but DT_PLTREL is DT_RELA";
    return false;
  }

  return true;
}
           

好吧,相當于又解析了一遍。

接着看ApplyAll方法:

bool ElfRelocations::ApplyAll(const ElfSymbols* symbols,
                              SymbolResolver* resolver,
                              Error* error) {
  LOG("%s: Enter\n", __FUNCTION__);

  if (has_text_relocations_) {
    if (phdr_table_unprotect_segments(phdr_, phdr_count_, load_bias_) < ) {
      error->Format("Can't unprotect loadable segments: %s", strerror(errno));
      return false;
    }
  }

  if (relocations_type_ == DT_REL) {
    if (!ApplyRelRelocs(reinterpret_cast<ELF::Rel*>(plt_relocations_),
                        plt_relocations_size_ / sizeof(ELF::Rel),
                        symbols,
                        resolver,
                        error))
      return false;
    if (!ApplyRelRelocs(reinterpret_cast<ELF::Rel*>(relocations_),
                        relocations_size_ / sizeof(ELF::Rel),
                        symbols,
                        resolver,
                        error))
      return false;
  }

  else if (relocations_type_ == DT_RELA) {
    if (!ApplyRelaRelocs(reinterpret_cast<ELF::Rela*>(plt_relocations_),
                         plt_relocations_size_ / sizeof(ELF::Rela),
                         symbols,
                         resolver,
                         error))
      return false;
    if (!ApplyRelaRelocs(reinterpret_cast<ELF::Rela*>(relocations_),
                         relocations_size_ / sizeof(ELF::Rela),
                         symbols,
                         resolver,
                         error))
      return false;
  }

#ifdef __mips__
  if (!RelocateMipsGot(symbols, resolver, error))
    return false;
#endif

  if (has_text_relocations_) {
    if (phdr_table_protect_segments(phdr_, phdr_count_, load_bias_) < ) {
      error->Format("Can't reprotect loadable segments: %s", strerror(errno));
      return false;
    }
  }

  LOG("%s: Done\n", __FUNCTION__);
  return true;
}
           

還是兩步走,如果是DT_REL,那麼就去調用

ApplyRelRelocs

執行plt_relocations_和relocations_的重定位;如果是DT_RELA,那麼就去調用

ApplyRelaRelocs

執行plt_relocations_和relocations_的重定位。

我們隻看一個,另一個邏輯是差不多的:

bool ElfRelocations::ApplyRelRelocs(const ELF::Rel* rel,
                                    size_t rel_count,
                                    const ElfSymbols* symbols,
                                    SymbolResolver* resolver,
                                    Error* error) {
  RLOG("%s: rel=%p rel_count=%d\n", __FUNCTION__, rel, rel_count);

  if (!rel)
    return true;

  for (size_t rel_n = ; rel_n < rel_count; rel++, rel_n++) {
    const ELF::Word rel_type = ELF_R_TYPE(rel->r_info);
    const ELF::Word rel_symbol = ELF_R_SYM(rel->r_info);

    ELF::Addr sym_addr = ;
    ELF::Addr reloc = static_cast<ELF::Addr>(rel->r_offset + load_bias_);
    RLOG("  %d/%d reloc=%p offset=%p type=%d symbol=%d\n",
         rel_n + ,
         rel_count,
         reloc,
         rel->r_offset,
         rel_type,
         rel_symbol);

    if (rel_type == )
      continue;

    bool resolved = false;

    // If this is a symbolic relocation, compute the symbol's address.
    if (__builtin_expect(rel_symbol != , )) {
      resolved = ResolveSymbol(rel_type,
                               rel_symbol,
                               symbols,
                               resolver,
                               reloc,
                               &sym_addr,
                               error);
    }

    if (!ApplyRelReloc(rel, sym_addr, resolved, error))
      return false;
  }

  return true;
}
           

從重定位表的第一項開始,一個個解析。先看下重定位表項的格式:

struct Elf32_Rel {
  Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
  Elf32_Word r_info;   // Symbol table index and type of relocation to apply

  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
  // and ELF32_R_INFO macros defined in the ELF specification:
  Elf32_Word getSymbol() const { return (r_info >> ); }
  unsigned char getType() const { return (unsigned char) (r_info & ); }
  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
  void setSymbolAndType(Elf32_Word s, unsigned char t) {
    r_info = (s << ) + t;
  }
};
           

兩個字段,前面4位元組是需要進行重定位的位址,後面4位元組包含要進行重定位的符号表索引以及重定位的類型。

執行重定位的時候,先獲得需要進行重定位的位址,加上基位址就是記憶體中的位址reloc,然後分别得到重定位的符号表類型rel_type和索引rel_symbol。然後調用

ResolveSymbol

去解析這個符号的實際位址resolved,最後利用這個位址去實作重定位

ApplyRelReloc

看下解析是怎麼實作的:

bool ElfRelocations::ResolveSymbol(ELF::Word rel_type,
                                   ELF::Word rel_symbol,
                                   const ElfSymbols* symbols,
                                   SymbolResolver* resolver,
                                   ELF::Addr reloc,
                                   ELF::Addr* sym_addr,
                                   Error* error) {
  const char* sym_name = symbols->LookupNameById(rel_symbol);
  RLOG("    symbol name='%s'\n", sym_name);
  void* address = resolver->Lookup(sym_name);

  if (address) {
    // The symbol was found, so compute its address.
    RLOG("%s: symbol %s resolved to %p\n", __FUNCTION__, sym_name, address);
    *sym_addr = reinterpret_cast<ELF::Addr>(address);
    return true;
  }

  // The symbol was not found. Normally this is an error except
  // if this is a weak reference.
  if (!symbols->IsWeakById(rel_symbol)) {
    error->Format("Could not find symbol '%s'", sym_name);
    return false;
  }

  RLOG("%s: weak reference to unresolved symbol %s\n", __FUNCTION__, sym_name);

  // IHI0044C AAELF 4.5.1.1:
  // Libraries are not searched to resolve weak references.
  // It is not an error for a weak reference to remain
  // unsatisfied.
  //
  // During linking, the value of an undefined weak reference is:
  // - Zero if the relocation type is absolute
  // - The address of the place if the relocation is pc-relative
  // - The address of nominal base address if the relocation
  //   type is base-relative.
  RelocationType r = GetRelocationType(rel_type);
  if (r == RELOCATION_TYPE_ABSOLUTE || r == RELOCATION_TYPE_RELATIVE) {
    *sym_addr = ;
    return true;
  }

  if (r == RELOCATION_TYPE_PC_RELATIVE) {
    *sym_addr = reloc;
    return true;
  }

  error->Format(
      "Invalid weak relocation type (%d) for unknown symbol '%s'",
      r,
      sym_name);
  return false;
}
           

首先是調用

LookupNameById

根據rel_symbol找到對應的符号名稱sym_name,然後調用resolver的

Lookup

找到sym_name對應的符号位址address,最後做一個類型轉換變成sym_addr。

如果根據符号索引找不到對應的符号,要麼說明重定位的過程出錯了,要麼說明這個符号是一個弱連結。

看下符号的查找過程:

const char* LookupNameById(size_t symbol_id) const {
    const ELF::Sym* sym = LookupById(symbol_id);
    if (!sym)
      return NULL;
    return string_table_ + sym->st_name;
  }

  const ELF::Sym* LookupById(size_t symbol_id) const {
    return &symbol_table_[symbol_id];
  }
           

symbol_id表示了該符号在符号表中的索引,symbol_table_[symbol_id]則表示了該符号在字元串表中的索引,那麼就可以得到符号的名稱了。

virtual void* Lookup(const char* symbol_name) {
    // TODO(digit): Add the ability to lookup inside the main executable.

    // First, look inside the current library.
    const ELF::Sym* entry = lib_->LookupSymbolEntry(symbol_name);
    if (entry)
      return reinterpret_cast<void*>(lib_->load_bias() + entry->st_value);

    // Special case: redirect the dynamic linker symbols to our wrappers.
    // This ensures that loaded libraries can call dlopen() / dlsym()
    // and transparently use the crazy linker to perform their duty.
    void* address = WrapLinkerSymbol(symbol_name);
    if (address)
      return address;

    // Then look inside the dependencies.
    for (size_t n = ; n < dependencies_->GetCount(); ++n) {
      LibraryView* wrap = (*dependencies_)[n];
      // LOG("%s: Looking into dependency %p (%s)\n", __FUNCTION__, wrap,
      // wrap->GetName());
      if (wrap->IsSystem()) {
        address = ::dlsym(wrap->GetSystem(), symbol_name);
#ifdef __arm__
        // Android libm.so defines isnanf as weak. This means that its
        // address cannot be found by dlsym(), which always returns NULL
        // for weak symbols. However, libm.so contains the real isnanf
        // as __isnanf. If we encounter isnanf and fail to resolve it in
        // libm.so, retry with __isnanf.
        //
        // This occurs only in clang, which lacks __builtin_isnanf. The
        // gcc compiler implements isnanf as a builtin, so the symbol
        // isnanf never need be resolved in gcc builds.
        //
        // http://code.google.com/p/chromium/issues/detail?id=376828
        if (!address &&
            !strcmp(symbol_name, "isnanf") &&
            !strcmp(wrap->GetName(), "libm.so"))
          address = ::dlsym(wrap->GetSystem(), "__isnanf");
#endif
        if (address)
          return address;
      }
      if (wrap->IsCrazy()) {
        SharedLibrary* dep = wrap->GetCrazy();
        entry = dep->LookupSymbolEntry(symbol_name);
        if (entry)
          return reinterpret_cast<void*>(dep->load_bias() + entry->st_value);
      }
    }

    // Nothing found here.
    return NULL;
  }
           

首先,在目前的庫中找,

LookupSymbolEntry

找到了就直接傳回位址。

特殊情況下,會對動态連結符号做一個封裝

WrapLinkerSymbol

,保證被加載的庫可以直接通過dlopen() / dlsym()來進行連結。

如果本地庫中沒找到,那麼就會再去依賴庫中找。

找到符号位址之後,就要去重定位了,看下ApplyRelReloc的實作:

bool ElfRelocations::ApplyRelReloc(const ELF::Rel* rel,
                                   ELF::Addr sym_addr,
                                   bool resolved CRAZY_UNUSED,
                                   Error* error) {
  const ELF::Word rel_type = ELF_R_TYPE(rel->r_info);
  const ELF::Word CRAZY_UNUSED rel_symbol = ELF_R_SYM(rel->r_info);

  const ELF::Addr reloc = static_cast<ELF::Addr>(rel->r_offset + load_bias_);

  RLOG("  rel reloc=%p offset=%p type=%d\n", reloc, rel->r_offset, rel_type);

  // Apply the relocation.
  ELF::Addr* CRAZY_UNUSED target = reinterpret_cast<ELF::Addr*>(reloc);
  switch (rel_type) {
#ifdef __arm__
    case R_ARM_JUMP_SLOT:
      RLOG("  R_ARM_JUMP_SLOT target=%p addr=%p\n", target, sym_addr);
      *target = sym_addr;
      break;

    case R_ARM_GLOB_DAT:
      RLOG("  R_ARM_GLOB_DAT target=%p addr=%p\n", target, sym_addr);
      *target = sym_addr;
      break;

    case R_ARM_ABS32:
      RLOG("  R_ARM_ABS32 target=%p (%p) addr=%p\n",
           target,
           *target,
           sym_addr);
      *target += sym_addr;
      break;

    case R_ARM_REL32:
      RLOG("  R_ARM_REL32 target=%p (%p) addr=%p offset=%p\n",
           target,
           *target,
           sym_addr,
           rel->r_offset);
      *target += sym_addr - rel->r_offset;
      break;

    case R_ARM_RELATIVE:
      RLOG("  R_ARM_RELATIVE target=%p (%p) bias=%p\n",
           target,
           *target,
           load_bias_);
      if (__builtin_expect(rel_symbol, )) {
        *error = "Invalid relative relocation with symbol";
        return false;
      }
      *target += load_bias_;
      break;

    case R_ARM_COPY:
      // NOTE: These relocations are forbidden in shared libraries.
      // The Android linker has special code to deal with this, which
      // is not needed here.
      RLOG("  R_ARM_COPY\n");
      *error = "Invalid R_ARM_COPY relocation in shared library";
      return false;
#endif  // __arm__

#ifdef __i386__
    case R_386_JMP_SLOT:
      *target = sym_addr;
      break;

    case R_386_GLOB_DAT:
      *target = sym_addr;
      break;

    case R_386_RELATIVE:
      if (rel_symbol) {
        *error = "Invalid relative relocation with symbol";
        return false;
      }
      *target += load_bias_;
      break;

    case R_386_32:
      *target += sym_addr;
      break;

    case R_386_PC32:
      *target += (sym_addr - reloc);
      break;
#endif  // __i386__

#ifdef __mips__
    case R_MIPS_REL32:
      if (resolved)
        *target += sym_addr;
      else
        *target += load_bias_;
      break;
#endif  // __mips__

    default:
      error->Format("Invalid relocation type (%d)", rel_type);
      return false;
  }

  return true;
}
           

reloc是需要進行重定位的位址,sym_addr是符号的位址,rel_type是重定位的類型。可以看到執行重定位時會根據不同的類型進行不同的處理,把對應的sym_addr賦給*target。

至此,重定位的過程就全部完成了。