summaryrefslogtreecommitdiff
path: root/chromium/base/profiler/native_stack_sampler_mac.cc
blob: a161173f0608dc2e8c4aaeaf654f832c689ebd0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/profiler/native_stack_sampler.h"

#include <dlfcn.h>
#include <libkern/OSByteOrder.h>
#include <libunwind.h>
#include <mach-o/compact_unwind_encoding.h>
#include <mach-o/getsect.h>
#include <mach-o/swap.h>
#include <mach/kern_return.h>
#include <mach/mach.h>
#include <mach/thread_act.h>
#include <mach/vm_map.h>
#include <pthread.h>
#include <sys/resource.h>
#include <sys/syslimits.h>

#include <algorithm>
#include <map>
#include <memory>

#include "base/logging.h"
#include "base/mac/mach_logging.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_number_conversions.h"

extern "C" {
void _sigtramp(int, int, struct sigset*);
}

namespace base {

namespace {

// Maps a module's address range (half-open) in memory to an index in a separate
// data structure.
struct ModuleIndex {
  ModuleIndex(uintptr_t start, uintptr_t end, size_t idx)
      : base_address(start), end_address(end), index(idx){};
  // Base address of the represented module.
  uintptr_t base_address;
  // First address off the end of the represented module.
  uintptr_t end_address;
  // An index to the represented module in a separate container.
  size_t index;
};

// Module identifiers ---------------------------------------------------------

// Returns the unique build ID for a module loaded at |module_addr|. Returns the
// empty string if the function fails to get the build ID.
//
// Build IDs are created by the concatenation of the module's GUID (Windows) /
// UUID (Mac) and an "age" field that indicates how many times that GUID/UUID
// has been reused. In Windows binaries, the "age" field is present in the
// module header, but on the Mac, UUIDs are never reused and so the "age" value
// appended to the UUID is always 0.
std::string GetUniqueId(const void* module_addr) {
  const mach_header_64* mach_header =
      reinterpret_cast<const mach_header_64*>(module_addr);
  DCHECK_EQ(MH_MAGIC_64, mach_header->magic);

  size_t offset = sizeof(mach_header_64);
  size_t offset_limit = sizeof(mach_header_64) + mach_header->sizeofcmds;
  for (uint32_t i = 0; (i < mach_header->ncmds) &&
                       (offset + sizeof(load_command) < offset_limit);
       ++i) {
    const load_command* current_cmd = reinterpret_cast<const load_command*>(
        reinterpret_cast<const uint8_t*>(mach_header) + offset);

    if (offset + current_cmd->cmdsize > offset_limit) {
      // This command runs off the end of the command list. This is malformed.
      return std::string();
    }

    if (current_cmd->cmd == LC_UUID) {
      if (current_cmd->cmdsize < sizeof(uuid_command)) {
        // This "UUID command" is too small. This is malformed.
        return std::string();
      }

      const uuid_command* uuid_cmd =
          reinterpret_cast<const uuid_command*>(current_cmd);
      static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
                    "UUID field of UUID command should be 16 bytes.");
      // The ID is comprised of the UUID concatenated with the Mac's "age" value
      // which is always 0.
      return HexEncode(&uuid_cmd->uuid, sizeof(uuid_cmd->uuid)) + "0";
    }
    offset += current_cmd->cmdsize;
  }
  return std::string();
}

// Returns the size of the _TEXT segment of the module loaded at |module_addr|.
size_t GetModuleTextSize(const void* module_addr) {
  const mach_header_64* mach_header =
      reinterpret_cast<const mach_header_64*>(module_addr);
  DCHECK_EQ(MH_MAGIC_64, mach_header->magic);

  unsigned long module_size;
  getsegmentdata(mach_header, SEG_TEXT, &module_size);

  return module_size;
}

// Gets the index for the Module containing |instruction_pointer| in
// |modules|, adding it if it's not already present. Returns
// StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
// determined for |module|.
size_t GetModuleIndex(const uintptr_t instruction_pointer,
                      std::vector<StackSamplingProfiler::Module>* modules,
                      std::vector<ModuleIndex>* profile_module_index) {
  // Check if |instruction_pointer| is in the address range of a module we've
  // already seen.
  auto module_index =
      std::find_if(profile_module_index->begin(), profile_module_index->end(),
                   [instruction_pointer](const ModuleIndex& index) {
                     return instruction_pointer >= index.base_address &&
                            instruction_pointer < index.end_address;
                   });
  if (module_index != profile_module_index->end()) {
    return module_index->index;
  }
  Dl_info inf;
  if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
    return StackSamplingProfiler::Frame::kUnknownModuleIndex;

  StackSamplingProfiler::Module module(
      reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
      base::FilePath(inf.dli_fname));
  modules->push_back(module);

  uintptr_t base_module_address = reinterpret_cast<uintptr_t>(inf.dli_fbase);
  size_t index = modules->size() - 1;
  profile_module_index->emplace_back(
      base_module_address,
      base_module_address + GetModuleTextSize(inf.dli_fbase), index);
  return index;
}

// Stack walking --------------------------------------------------------------

// Fills |state| with |target_thread|'s context.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
  mach_msg_type_number_t count =
      static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
  return thread_get_state(target_thread, x86_THREAD_STATE64,
                          reinterpret_cast<thread_state_t>(state),
                          &count) == KERN_SUCCESS;
}

// If the value at |pointer| points to the original stack, rewrites it to point
// to the corresponding location in the copied stack.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
uintptr_t RewritePointerIfInOriginalStack(
    const uintptr_t* original_stack_bottom,
    const uintptr_t* original_stack_top,
    uintptr_t* stack_copy_bottom,
    uintptr_t pointer) {
  uintptr_t original_stack_bottom_int =
      reinterpret_cast<uintptr_t>(original_stack_bottom);
  uintptr_t original_stack_top_int =
      reinterpret_cast<uintptr_t>(original_stack_top);
  uintptr_t stack_copy_bottom_int =
      reinterpret_cast<uintptr_t>(stack_copy_bottom);

  if ((pointer < original_stack_bottom_int) ||
      (pointer >= original_stack_top_int)) {
    return pointer;
  }

  return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
}

// Copies the stack to a buffer while rewriting possible pointers to locations
// within the stack to point to the corresponding locations in the copy. This is
// necessary to handle stack frames with dynamic stack allocation, where a
// pointer to the beginning of the dynamic allocation area is stored on the
// stack and/or in a non-volatile register.
//
// Eager rewriting of anything that looks like a pointer to the stack, as done
// in this function, does not adversely affect the stack unwinding. The only
// other values on the stack the unwinding depends on are return addresses,
// which should not point within the stack memory. The rewriting is guaranteed
// to catch all pointers because the stacks are guaranteed by the ABI to be
// sizeof(void*) aligned.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
void CopyStackAndRewritePointers(uintptr_t* stack_copy_bottom,
                                 const uintptr_t* original_stack_bottom,
                                 const uintptr_t* original_stack_top,
                                 x86_thread_state64_t* thread_state)
    NO_SANITIZE("address") {
  size_t count = original_stack_top - original_stack_bottom;
  for (size_t pos = 0; pos < count; ++pos) {
    stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
        original_stack_bottom, original_stack_top, stack_copy_bottom,
        original_stack_bottom[pos]);
  }

  uint64_t* rewrite_registers[] = {&thread_state->__rbx, &thread_state->__rbp,
                                   &thread_state->__rsp, &thread_state->__r12,
                                   &thread_state->__r13, &thread_state->__r14,
                                   &thread_state->__r15};
  for (auto* reg : rewrite_registers) {
    *reg = RewritePointerIfInOriginalStack(
        original_stack_bottom, original_stack_top, stack_copy_bottom, *reg);
  }
}

// Extracts the "frame offset" for a given frame from the compact unwind info.
// A frame offset indicates the location of saved non-volatile registers in
// relation to the frame pointer. See |mach-o/compact_unwind_encoding.h| for
// details.
uint32_t GetFrameOffset(int compact_unwind_info) {
  // The frame offset lives in bytes 16-23. This shifts it down by the number of
  // leading zeroes in the mask, then masks with (1 << number of one bits in the
  // mask) - 1, turning 0x00FF0000 into 0x000000FF. Adapted from |EXTRACT_BITS|
  // in libunwind's CompactUnwinder.hpp.
  return (
      (compact_unwind_info >> __builtin_ctz(UNWIND_X86_64_RBP_FRAME_OFFSET)) &
      (((1 << __builtin_popcount(UNWIND_X86_64_RBP_FRAME_OFFSET))) - 1));
}

// True if the unwind from |leaf_frame_rip| may trigger a crash bug in
// unw_init_local. If so, the stack walk should be aborted at the leaf frame.
bool MayTriggerUnwInitLocalCrash(uint64_t leaf_frame_rip) {
  // The issue here is a bug in unw_init_local that, in some unwinds, results in
  // attempts to access memory at the address immediately following the address
  // range of the library. When the library is the last of the mapped libraries
  // that address is in a different memory region. Starting with 10.13.4 beta
  // releases it appears that this region is sometimes either unmapped or mapped
  // without read access, resulting in crashes on the attempted access. It's not
  // clear what circumstances result in this situation; attempts to reproduce on
  // a 10.13.4 beta did not trigger the issue.
  //
  // The workaround is to check if the memory address that would be accessed is
  // readable, and if not, abort the stack walk before calling unw_init_local.
  // As of 2018/03/19 about 0.1% of non-idle stacks on the UI and GPU main
  // threads have a leaf frame in the last library. Since the issue appears to
  // only occur some of the time it's expected that the quantity of lost samples
  // will be lower than 0.1%, possibly significantly lower.
  //
  // TODO(lgrey): Add references above to LLVM/Radar bugs on unw_init_local once
  // filed.
  Dl_info info;
  if (dladdr(reinterpret_cast<const void*>(leaf_frame_rip), &info) == 0)
    return false;
  uint64_t unused;
  vm_size_t size = sizeof(unused);
  return vm_read_overwrite(current_task(),
                           reinterpret_cast<vm_address_t>(info.dli_fbase) +
                               GetModuleTextSize(info.dli_fbase),
                           sizeof(unused),
                           reinterpret_cast<vm_address_t>(&unused), &size) != 0;
}

// Check if the cursor contains a valid-looking frame pointer for frame pointer
// unwinds. If the stack frame has a frame pointer, stepping the cursor will
// involve indexing memory access off of that pointer. In that case,
// sanity-check the frame pointer register to ensure it's within bounds.
//
// Additionally, the stack frame might be in a prologue or epilogue, which can
// cause a crash when the unwinder attempts to access non-volatile registers
// that have not yet been pushed, or have already been popped from the
// stack. libwunwind will try to restore those registers using an offset from
// the frame pointer. However, since we copy the stack from RSP up, any
// locations below the stack pointer are before the beginning of the stack
// buffer. Account for this by checking that the expected location is above the
// stack pointer, and rejecting the sample if it isn't.
bool HasValidRbp(unw_cursor_t* unwind_cursor, uintptr_t stack_top) {
  unw_proc_info_t proc_info;
  unw_get_proc_info(unwind_cursor, &proc_info);
  if ((proc_info.format & UNWIND_X86_64_MODE_MASK) ==
      UNWIND_X86_64_MODE_RBP_FRAME) {
    unw_word_t rsp, rbp;
    unw_get_reg(unwind_cursor, UNW_X86_64_RSP, &rsp);
    unw_get_reg(unwind_cursor, UNW_X86_64_RBP, &rbp);
    uint32_t offset = GetFrameOffset(proc_info.format) * sizeof(unw_word_t);
    if (rbp < offset || (rbp - offset) < rsp || rbp > stack_top) {
      return false;
    }
  }
  return true;
}

// Walks the stack represented by |unwind_context|, calling back to the provided
// lambda for each frame. Returns false if an error occurred, otherwise returns
// true.
template <typename StackFrameCallback, typename ContinueUnwindPredicate>
bool WalkStackFromContext(
    unw_context_t* unwind_context,
    size_t* frame_count,
    std::vector<StackSamplingProfiler::Module>* current_modules,
    std::vector<ModuleIndex>* profile_module_index,
    const StackFrameCallback& callback,
    const ContinueUnwindPredicate& continue_unwind) {
  unw_cursor_t unwind_cursor;
  unw_init_local(&unwind_cursor, unwind_context);

  int step_result;
  unw_word_t rip;
  do {
    ++(*frame_count);
    unw_get_reg(&unwind_cursor, UNW_REG_IP, &rip);

    // Ensure IP is in a module.
    //
    // Frameless unwinding (non-DWARF) works by fetching the function's
    // stack size from the unwind encoding or stack, and adding it to the
    // stack pointer to determine the function's return address.
    //
    // If we're in a function prologue or epilogue, the actual stack size
    // may be smaller than it will be during the normal course of execution.
    // When libunwind adds the expected stack size, it will look for the
    // return address in the wrong place. This check should ensure that we
    // bail before trying to deref a bad IP obtained this way in the previous
    // frame.
    size_t module_index =
        GetModuleIndex(rip, current_modules, profile_module_index);
    if (module_index == StackSamplingProfiler::Frame::kUnknownModuleIndex) {
      return false;
    }

    callback(static_cast<uintptr_t>(rip), module_index);

    if (!continue_unwind(&unwind_cursor))
      return false;

    step_result = unw_step(&unwind_cursor);
  } while (step_result > 0);

  if (step_result != 0)
    return false;

  return true;
}

const char* LibSystemKernelName() {
  static char path[PATH_MAX];
  static char* name = nullptr;
  if (name)
    return name;

  Dl_info info;
  dladdr(reinterpret_cast<void*>(_exit), &info);
  strlcpy(path, info.dli_fname, PATH_MAX);
  name = path;

#if !defined(ADDRESS_SANITIZER)
  DCHECK_EQ(std::string(name),
            std::string("/usr/lib/system/libsystem_kernel.dylib"));
#endif
  return name;
}

void GetSigtrampRange(uintptr_t* start, uintptr_t* end) {
  uintptr_t address = reinterpret_cast<uintptr_t>(&_sigtramp);
  DCHECK(address != 0);

  *start = address;

  unw_context_t context;
  unw_cursor_t cursor;
  unw_proc_info_t info;

  unw_getcontext(&context);
  // Set the context's RIP to the beginning of sigtramp,
  // +1 byte to work around a bug in 10.11 (crbug.com/764468).
  context.data[16] = address + 1;
  unw_init_local(&cursor, &context);
  unw_get_proc_info(&cursor, &info);

  DCHECK_EQ(info.start_ip, address);
  *end = info.end_ip;
}

// Walks the stack represented by |thread_state|, calling back to the provided
// lambda for each frame.
template <typename StackFrameCallback, typename ContinueUnwindPredicate>
void WalkStack(const x86_thread_state64_t& thread_state,
               std::vector<StackSamplingProfiler::Module>* current_modules,
               std::vector<ModuleIndex>* profile_module_index,
               const StackFrameCallback& callback,
               const ContinueUnwindPredicate& continue_unwind) {
  size_t frame_count = 0;
  // This uses libunwind to walk the stack. libunwind is designed to be used for
  // a thread to walk its own stack. This creates two problems.

  // Problem 1: There is no official way to create a unw_context other than to
  // create it from the current state of the current thread's stack. To get
  // around this, forge a context. A unw_context is just a copy of the 16 main
  // registers followed by the instruction pointer, nothing more.
  // Coincidentally, the first 17 items of the x86_thread_state64_t type are
  // exactly those registers in exactly the same order, so just bulk copy them
  // over.
  unw_context_t unwind_context;
  memcpy(&unwind_context, &thread_state, sizeof(uintptr_t) * 17);
  bool result =
      WalkStackFromContext(&unwind_context, &frame_count, current_modules,
                           profile_module_index, callback, continue_unwind);

  if (!result)
    return;

  if (frame_count == 1) {
    // Problem 2: Because libunwind is designed to be triggered by user code on
    // their own thread, if it hits a library that has no unwind info for the
    // function that is being executed, it just stops. This isn't a problem in
    // the normal case, but in this case, it's quite possible that the stack
    // being walked is stopped in a function that bridges to the kernel and thus
    // is missing the unwind info.

    // For now, just unwind the single case where the thread is stopped in a
    // function in libsystem_kernel.
    uint64_t& rsp = unwind_context.data[7];
    uint64_t& rip = unwind_context.data[16];
    Dl_info info;
    if (dladdr(reinterpret_cast<void*>(rip), &info) != 0 &&
      strcmp(info.dli_fname, LibSystemKernelName()) == 0) {
      rip = *reinterpret_cast<uint64_t*>(rsp);
      rsp += 8;
      WalkStackFromContext(&unwind_context, &frame_count, current_modules,
                           profile_module_index, callback, continue_unwind);
    }
  }
}

// ScopedSuspendThread --------------------------------------------------------

// Suspends a thread for the lifetime of the object.
class ScopedSuspendThread {
 public:
  explicit ScopedSuspendThread(mach_port_t thread_port)
      : thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
                         ? thread_port
                         : MACH_PORT_NULL) {}

  ~ScopedSuspendThread() {
    if (!was_successful())
      return;

    kern_return_t kr = thread_resume(thread_port_);
    MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
  }

  bool was_successful() const { return thread_port_ != MACH_PORT_NULL; }

 private:
  mach_port_t thread_port_;

  DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
};

// NativeStackSamplerMac ------------------------------------------------------

class NativeStackSamplerMac : public NativeStackSampler {
 public:
  NativeStackSamplerMac(mach_port_t thread_port,
                        AnnotateCallback annotator,
                        NativeStackSamplerTestDelegate* test_delegate);
  ~NativeStackSamplerMac() override;

  // StackSamplingProfiler::NativeStackSampler:
  void ProfileRecordingStarting(
      std::vector<StackSamplingProfiler::Module>* modules) override;
  void RecordStackSample(StackBuffer* stack_buffer,
                         StackSamplingProfiler::Sample* sample) override;
  void ProfileRecordingStopped(StackBuffer* stack_buffer) override;

 private:
  // Suspends the thread with |thread_port_|, copies its stack and resumes the
  // thread, then records the stack frames and associated modules into |sample|.
  void SuspendThreadAndRecordStack(StackBuffer* stack_buffer,
                                   StackSamplingProfiler::Sample* sample);

  // Weak reference: Mach port for thread being profiled.
  mach_port_t thread_port_;

  const AnnotateCallback annotator_;

  NativeStackSamplerTestDelegate* const test_delegate_;

  // The stack base address corresponding to |thread_handle_|.
  const void* const thread_stack_base_address_;

  // Weak. Points to the modules associated with the profile being recorded
  // between ProfileRecordingStarting() and ProfileRecordingStopped().
  std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;

  // Maps a module's address range to the corresponding Module's index within
  // current_modules_.
  std::vector<ModuleIndex> profile_module_index_;

  // The address range of |_sigtramp|, the signal trampoline function.
  uintptr_t sigtramp_start_;
  uintptr_t sigtramp_end_;

  DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
};

NativeStackSamplerMac::NativeStackSamplerMac(
    mach_port_t thread_port,
    AnnotateCallback annotator,
    NativeStackSamplerTestDelegate* test_delegate)
    : thread_port_(thread_port),
      annotator_(annotator),
      test_delegate_(test_delegate),
      thread_stack_base_address_(
          pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))) {
  DCHECK(annotator_);

  GetSigtrampRange(&sigtramp_start_, &sigtramp_end_);
  // This class suspends threads, and those threads might be suspended in dyld.
  // Therefore, for all the system functions that might be linked in dynamically
  // that are used while threads are suspended, make calls to them to make sure
  // that they are linked up.
  x86_thread_state64_t thread_state;
  GetThreadState(thread_port_, &thread_state);
}

NativeStackSamplerMac::~NativeStackSamplerMac() {}

void NativeStackSamplerMac::ProfileRecordingStarting(
    std::vector<StackSamplingProfiler::Module>* modules) {
  current_modules_ = modules;
  profile_module_index_.clear();
}

void NativeStackSamplerMac::RecordStackSample(
    StackBuffer* stack_buffer,
    StackSamplingProfiler::Sample* sample) {
  DCHECK(current_modules_);

  SuspendThreadAndRecordStack(stack_buffer, sample);
}

void NativeStackSamplerMac::ProfileRecordingStopped(StackBuffer* stack_buffer) {
  current_modules_ = nullptr;
}

void NativeStackSamplerMac::SuspendThreadAndRecordStack(
    StackBuffer* stack_buffer,
    StackSamplingProfiler::Sample* sample) {
  x86_thread_state64_t thread_state;

  // Copy the stack.

  uintptr_t new_stack_top = 0;
  {
    // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
    // allocate memory, including indirectly via use of DCHECK/CHECK or other
    // logging statements. Otherwise this code can deadlock on heap locks in the
    // default heap acquired by the target thread before it was suspended.
    ScopedSuspendThread suspend_thread(thread_port_);
    if (!suspend_thread.was_successful())
      return;

    if (!GetThreadState(thread_port_, &thread_state))
      return;
    uintptr_t stack_top =
        reinterpret_cast<uintptr_t>(thread_stack_base_address_);
    uintptr_t stack_bottom = thread_state.__rsp;
    if (stack_bottom >= stack_top)
      return;
    uintptr_t stack_size = stack_top - stack_bottom;

    if (stack_size > stack_buffer->size())
      return;

    (*annotator_)(sample);

    CopyStackAndRewritePointers(
        reinterpret_cast<uintptr_t*>(stack_buffer->buffer()),
        reinterpret_cast<uintptr_t*>(stack_bottom),
        reinterpret_cast<uintptr_t*>(stack_top), &thread_state);

    new_stack_top =
        reinterpret_cast<uintptr_t>(stack_buffer->buffer()) + stack_size;
  }  // ScopedSuspendThread

  if (test_delegate_)
    test_delegate_->OnPreStackWalk();

  // Walk the stack and record it.

  // Reserve enough memory for most stacks, to avoid repeated allocations.
  // Approximately 99.9% of recorded stacks are 128 frames or fewer.
  sample->frames.reserve(128);

  auto* current_modules = current_modules_;
  auto* profile_module_index = &profile_module_index_;

  // Avoid an out-of-bounds read bug in libunwind that can crash us in some
  // circumstances. If we're subject to that case, just record the first frame
  // and bail. See MayTriggerUnwInitLocalCrash for details.
  uintptr_t rip = thread_state.__rip;
  if (MayTriggerUnwInitLocalCrash(rip)) {
    sample->frames.emplace_back(
        rip, GetModuleIndex(rip, current_modules, profile_module_index));
    return;
  }

  const auto continue_predicate = [this,
                                   new_stack_top](unw_cursor_t* unwind_cursor) {
    // Don't continue if we're in sigtramp. Unwinding this from another thread
    // is very fragile. It's a complex DWARF unwind that needs to restore the
    // entire thread context which was saved by the kernel when the interrupt
    // occurred.
    unw_word_t rip;
    unw_get_reg(unwind_cursor, UNW_REG_IP, &rip);
    if (rip >= sigtramp_start_ && rip < sigtramp_end_)
      return false;

    // Don't continue if rbp appears to be invalid (due to a previous bad
    // unwind).
    return HasValidRbp(unwind_cursor, new_stack_top);
  };

  WalkStack(thread_state, current_modules, profile_module_index,
            [sample, current_modules, profile_module_index](
                uintptr_t frame_ip, size_t module_index) {
              sample->frames.emplace_back(frame_ip, module_index);
            },
            continue_predicate);
}

}  // namespace

std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
    PlatformThreadId thread_id,
    AnnotateCallback annotator,
    NativeStackSamplerTestDelegate* test_delegate) {
  return std::make_unique<NativeStackSamplerMac>(thread_id, annotator,
                                                 test_delegate);
}

size_t NativeStackSampler::GetStackBufferSize() {
  // In platform_thread_mac's GetDefaultThreadStackSize(), RLIMIT_STACK is used
  // for all stacks, not just the main thread's, so it is good for use here.
  struct rlimit stack_rlimit;
  if (getrlimit(RLIMIT_STACK, &stack_rlimit) == 0 &&
      stack_rlimit.rlim_cur != RLIM_INFINITY) {
    return stack_rlimit.rlim_cur;
  }

  // If getrlimit somehow fails, return the default macOS main thread stack size
  // of 8 MB (DFLSSIZ in <i386/vmparam.h>) with extra wiggle room.
  return 12 * 1024 * 1024;
}

}  // namespace base