xray_x86_64.cpp 11.6 KB
#include "cpuid.h"
#include "sanitizer_common/sanitizer_common.h"
#if !SANITIZER_FUCHSIA
#include "sanitizer_common/sanitizer_posix.h"
#endif
#include "xray_defs.h"
#include "xray_interface_internal.h"

#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
#include <sys/types.h>
#if SANITIZER_OPENBSD
#include <sys/time.h>
#include <machine/cpu.h>
#endif
#include <sys/sysctl.h>
#elif SANITIZER_FUCHSIA
#include <zircon/syscalls.h>
#endif

#include <atomic>
#include <cstdint>
#include <errno.h>
#include <fcntl.h>
#include <iterator>
#include <limits>
#include <tuple>
#include <unistd.h>

namespace __xray {

#if SANITIZER_LINUX
static std::pair<ssize_t, bool>
retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
  auto BytesToRead = std::distance(Begin, End);
  ssize_t BytesRead;
  ssize_t TotalBytesRead = 0;
  while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
    if (BytesRead == -1) {
      if (errno == EINTR)
        continue;
      Report("Read error; errno = %d\n", errno);
      return std::make_pair(TotalBytesRead, false);
    }

    TotalBytesRead += BytesRead;
    BytesToRead -= BytesRead;
    Begin += BytesRead;
  }
  return std::make_pair(TotalBytesRead, true);
}

static bool readValueFromFile(const char *Filename,
                              long long *Value) XRAY_NEVER_INSTRUMENT {
  int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
  if (Fd == -1)
    return false;
  static constexpr size_t BufSize = 256;
  char Line[BufSize] = {};
  ssize_t BytesRead;
  bool Success;
  std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
  close(Fd);
  if (!Success)
    return false;
  const char *End = nullptr;
  long long Tmp = internal_simple_strtoll(Line, &End, 10);
  bool Result = false;
  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
    *Value = Tmp;
    Result = true;
  }
  return Result;
}

uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
  long long TSCFrequency = -1;
  if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
                        &TSCFrequency)) {
    TSCFrequency *= 1000;
  } else if (readValueFromFile(
                 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
                 &TSCFrequency)) {
    TSCFrequency *= 1000;
  } else {
    Report("Unable to determine CPU frequency for TSC accounting.\n");
  }
  return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
}
#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
    long long TSCFrequency = -1;
    size_t tscfreqsz = sizeof(TSCFrequency);
#if SANITIZER_OPENBSD
    int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
    if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
#elif SANITIZER_MAC
    if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
                              &tscfreqsz, NULL, 0) != -1) {

#else
    if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
                              NULL, 0) != -1) {
#endif
        return static_cast<uint64_t>(TSCFrequency);
    } else {
      Report("Unable to determine CPU frequency for TSC accounting.\n");
    }

    return 0;
}
#elif !SANITIZER_FUCHSIA
uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
    /* Not supported */
    return 0;
}
#endif

static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
static constexpr uint16_t Jmp20Seq = 0x14eb;
static constexpr uint16_t Jmp15Seq = 0x0feb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
static constexpr uint16_t NopwSeq = 0x9066;

static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};

bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                        const XRaySledEntry &Sled,
                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
  // Here we do the dance of replacing the following sled:
  //
  // xray_sled_n:
  //   jmp +9
  //   <9 byte nop>
  //
  // With the following:
  //
  //   mov r10d, <function id>
  //   call <relative 32bit offset to entry trampoline>
  //
  // We need to do this in the following order:
  //
  // 1. Put the function id first, 2 bytes from the start of the sled (just
  // after the 2-byte jmp instruction).
  // 2. Put the call opcode 6 bytes from the start of the sled.
  // 3. Put the relative offset 7 bytes from the start of the sled.
  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
  // opcode and first operand.
  //
  // Prerequisite is to compute the relative offset to the trampoline's address.
  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                             (static_cast<int64_t>(Sled.Address) + 11);
  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
           Trampoline, reinterpret_cast<void *>(Sled.Address));
    return false;
  }
  if (Enable) {
    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
        std::memory_order_release);
  } else {
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
        std::memory_order_release);
    // FIXME: Write out the nops still?
  }
  return true;
}

bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
  // Here we do the dance of replacing the following sled:
  //
  // xray_sled_n:
  //   ret
  //   <10 byte nop>
  //
  // With the following:
  //
  //   mov r10d, <function id>
  //   jmp <relative 32bit offset to exit trampoline>
  //
  // 1. Put the function id first, 2 bytes from the start of the sled (just
  // after the 1-byte ret instruction).
  // 2. Put the jmp opcode 6 bytes from the start of the sled.
  // 3. Put the relative offset 7 bytes from the start of the sled.
  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
  // opcode and first operand.
  //
  // Prerequisite is to compute the relative offset fo the
  // __xray_FunctionExit function's address.
  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
                             (static_cast<int64_t>(Sled.Address) + 11);
  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
    return false;
  }
  if (Enable) {
    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
        std::memory_order_release);
  } else {
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
        std::memory_order_release);
    // FIXME: Write out the nops still?
  }
  return true;
}

bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
  // Here we do the dance of replacing the tail call sled with a similar
  // sequence as the entry sled, but calls the tail exit sled instead.
  int64_t TrampolineOffset =
      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
      (static_cast<int64_t>(Sled.Address) + 11);
  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
    Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
           __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
    return false;
  }
  if (Enable) {
    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
        std::memory_order_release);
  } else {
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
        std::memory_order_release);
    // FIXME: Write out the nops still?
  }
  return true;
}

bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
  // Here we do the dance of replacing the following sled:
  //
  // In Version 0:
  //
  // xray_sled_n:
  //   jmp +20          // 2 bytes
  //   ...
  //
  // With the following:
  //
  //   nopw             // 2 bytes*
  //   ...
  //
  //
  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
  //
  // ---
  //
  // In Version 1:
  //
  //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
  //   to a jmp, use 15 bytes instead.
  //
  if (Enable) {
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
        std::memory_order_release);
  } else {
    switch (Sled.Version) {
    case 1:
      std::atomic_store_explicit(
          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
          std::memory_order_release);
      break;
    case 0:
    default:
      std::atomic_store_explicit(
          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
          std::memory_order_release);
      break;
    }
    }
  return false;
}

bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
  // Here we do the dance of replacing the following sled:
  //
  // xray_sled_n:
  //   jmp +20          // 2 byte instruction
  //   ...
  //
  // With the following:
  //
  //   nopw             // 2 bytes
  //   ...
  //
  //
  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
  // The 20 byte sled stashes three argument registers, calls the trampoline,
  // unstashes the registers and returns. If the arguments are already in
  // the correct registers, the stashing and unstashing become equivalently
  // sized nops.
  if (Enable) {
    std::atomic_store_explicit(
        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
        std::memory_order_release);
  } else {
      std::atomic_store_explicit(
          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
          std::memory_order_release);
  }
  return false;
}

#if !SANITIZER_FUCHSIA
// We determine whether the CPU we're running on has the correct features we
// need. In x86_64 this will be rdtscp support.
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
  unsigned int EAX, EBX, ECX, EDX;

  // We check whether rdtscp support is enabled. According to the x86_64 manual,
  // level should be set at 0x80000001, and we should have a look at bit 27 in
  // EDX. That's 0x8000000 (or 1u << 27).
  __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
    : "0"(0x80000001));
  if (!(EDX & (1u << 27))) {
    Report("Missing rdtscp support.\n");
    return false;
  }
  // Also check whether we can determine the CPU frequency, since if we cannot,
  // we should use the emulated TSC instead.
  if (!getTSCFrequency()) {
    Report("Unable to determine CPU frequency.\n");
    return false;
  }
  return true;
}
#endif

} // namespace __xray