Thrill  0.1
malloc_tracker.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/mem/malloc_tracker.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2013-2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE
13 #endif
14 
19 #include <tlx/backtrace.hpp>
20 #include <tlx/define.hpp>
21 
22 #if __linux__ || __APPLE__ || __FreeBSD__
23 
24 #include <dlfcn.h>
25 
26 #endif
27 
28 #include <algorithm>
29 #include <atomic>
30 #include <chrono>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <limits>
35 #include <mutex>
36 #include <utility>
37 
38 #if defined(__clang__) || defined(__GNUC__)
39 
40 #define ATTRIBUTE_NO_SANITIZE_ADDRESS \
41  __attribute__ ((no_sanitize_address)) /* NOLINT */
42 
43 #if defined(__GNUC__) && __GNUC__ >= 5
44 #define ATTRIBUTE_NO_SANITIZE_THREAD \
45  __attribute__ ((no_sanitize_thread)) /* NOLINT */
46 #else
47 #define ATTRIBUTE_NO_SANITIZE_THREAD
48 #endif
49 
50 #define ATTRIBUTE_NO_SANITIZE \
51  ATTRIBUTE_NO_SANITIZE_ADDRESS ATTRIBUTE_NO_SANITIZE_THREAD
52 
53 #else
54 #define ATTRIBUTE_NO_SANITIZE
55 #endif
56 
57 namespace thrill {
58 namespace mem {
59 
60 /******************************************************************************/
61 // user-defined options for output malloc()/free() operations to stderr
62 
63 // v-- set these to 1 for log output
64 static constexpr bool log_operations = 0;
65 static constexpr bool log_bypass_operations = 0;
66 
67 static constexpr size_t log_operations_threshold = 100000;
68 static constexpr size_t log_bypass_operations_threshold = 100000;
69 
70 // v-- set these to 1 for profiling output
71 static constexpr bool profile_operations = 0;
72 static constexpr bool profile_bypass_operations = 0;
73 
74 // enable checking of bypass_malloc() and bypass_free() pairing
75 #define BYPASS_CHECKER 0
76 
77 // super-simple and super-slow leak detection
78 #define LEAK_CHECKER 0
79 
80 /******************************************************************************/
81 // variables of malloc tracker
82 
83 //! In the generic hook implementation, we add to each allocation additional
84 //! data for bookkeeping.
85 static constexpr size_t padding = 16; /* bytes (>= 2*sizeof(size_t)) */
86 
87 //! function pointer to the real procedures, loaded using dlsym()
88 using malloc_type = void* (*)(size_t);
89 using free_type = void (*)(void*);
90 using realloc_type = void* (*)(void*, size_t);
91 using aligned_alloc_type = void* (*)(size_t, size_t);
92 
93 static malloc_type real_malloc = nullptr;
94 static free_type real_free = nullptr;
95 static realloc_type real_realloc = nullptr;
97 
98 //! a sentinel value prefixed to each allocation
99 static constexpr size_t sentinel = 0xDEADC0DE;
100 
101 #define USE_ATOMICS 0
102 
103 //! CounterType is used for atomic counters, and get to retrieve their
104 //! contents. Due to the thread-local cached statistics, the overall memory
105 //! usage counter can actually go negative!
106 #if defined(_MSC_VER) || USE_ATOMICS
107 using CounterType = std::atomic<ssize_t>;
108 #else
109 // we cannot use std::atomic on gcc/clang because only real atomic instructions
110 // work with the Sanitizers
111 using CounterType = ssize_t;
112 #endif
113 
114 // actually only such that formatting is not messed up
115 #define COUNTER_ZERO { 0 }
116 
118 static inline ssize_t get(const CounterType& a) {
119 #if defined(_MSC_VER) || USE_ATOMICS
120  return a.load();
121 #else
122  return a;
123 #endif
124 }
125 
127 static inline ssize_t sync_add_and_fetch(CounterType& curr, ssize_t inc) {
128 #if defined(_MSC_VER) || USE_ATOMICS
129  return (curr += inc);
130 #else
131  return __sync_add_and_fetch(&curr, inc);
132 #endif
133 }
134 
136 static inline ssize_t sync_sub_and_fetch(CounterType& curr, ssize_t dec) {
137 #if defined(_MSC_VER) || USE_ATOMICS
138  return (curr -= dec);
139 #else
140  return __sync_sub_and_fetch(&curr, dec);
141 #endif
142 }
143 
144 //! a simple memory heap for allocations prior to dlsym loading
145 #define INIT_HEAP_SIZE 1024 * 1024
148 static constexpr int log_operations_init_heap = 0;
149 
150 //! align allocations to init_heap to this number by rounding up allocations
151 static constexpr size_t init_alignment = sizeof(size_t);
152 
153 //! output
154 #define PPREFIX "malloc_tracker ### "
155 
156 /******************************************************************************/
157 // Run-time memory allocation statistics
158 
161 
164 
165 // free-floating memory allocated by malloc/free
167 
168 // Thrill base memory allocated by bypass_malloc/bypass_free
170 
171 //! memory limit exceeded indicator
172 bool memory_exceeded = false;
174 
175 // prototype for profiling
177 static void update_memprofile(ssize_t float_current, ssize_t base_current);
178 
179 struct LocalStats {
180  size_t total_allocs;
181  int64_t current_allocs;
182  int64_t bytes;
183 };
184 
185 #if !defined(__APPLE__)
186 #define HAVE_THREAD_LOCAL 1
187 #else
188 #define HAVE_THREAD_LOCAL 0
189 #endif
190 
191 #if HAVE_THREAD_LOCAL
192 static thread_local LocalStats tl_stats = { 0, 0, 0 };
193 static const ssize_t tl_delay_threshold = 1024 * 1024;
194 #endif
195 
197 void update_peak(ssize_t float_curr, ssize_t base_curr) {
198  if (float_curr + base_curr > peak_bytes)
199  peak_bytes = float_curr + base_curr;
200 }
201 
204 #if HAVE_THREAD_LOCAL
205  // no-operation of no thread_local is available.
206  ssize_t mycurr = sync_add_and_fetch(float_curr, tl_stats.bytes);
207 
208  sync_add_and_fetch(total_bytes, tl_stats.bytes);
209  sync_add_and_fetch(total_allocs, tl_stats.total_allocs);
210  sync_add_and_fetch(current_allocs, tl_stats.current_allocs);
211  update_peak(mycurr, base_curr);
212 
213  memory_exceeded = (mycurr >= memory_limit_indication);
214  update_memprofile(mycurr, get(base_curr));
215 
216  tl_stats.bytes = 0;
217  tl_stats.total_allocs = 0;
218  tl_stats.current_allocs = 0;
219 #endif
220 }
221 
222 //! add allocation to statistics
224 static void inc_count(size_t inc) {
225 #if HAVE_THREAD_LOCAL
226  tl_stats.total_allocs++;
227  tl_stats.current_allocs++;
228  tl_stats.bytes += inc;
229 
230  if (tl_stats.bytes > tl_delay_threshold)
232 #else
233  // no thread_local data structure -> update immediately (more contention)
234  ssize_t mycurr = sync_add_and_fetch(float_curr, inc);
235  total_bytes += inc;
236  update_peak(mycurr, base_curr);
237 
240 
241  memory_exceeded = (mycurr >= memory_limit_indication);
242  update_memprofile(mycurr, get(base_curr));
243 #endif
244 }
245 
246 //! decrement allocation to statistics
248 static void dec_count(size_t dec) {
249 #if HAVE_THREAD_LOCAL
250  tl_stats.current_allocs--;
251  tl_stats.bytes -= dec;
252 
253  if (tl_stats.bytes < -tl_delay_threshold)
255 #else
256  // no thread_local data structure -> update immediately (more contention)
257  ssize_t mycurr = sync_sub_and_fetch(float_curr, dec);
258 
260 
261  memory_exceeded = (mycurr >= memory_limit_indication);
262  update_memprofile(mycurr, get(base_curr));
263 #endif
264 }
265 
266 //! user function to return the currently allocated amount of memory
268  return float_curr;
269 }
270 
271 //! user function to return the peak allocation
273  return peak_bytes;
274 }
275 
276 //! user function to reset the peak allocation to current
278  peak_bytes = get(float_curr);
279 }
280 
281 //! user function to return total number of allocations
283  return total_allocs;
284 }
285 
286 //! user function which prints current and peak allocation to stderr
288  fprintf(stderr, PPREFIX "floating %zu, peak %zu, base %zu\n",
289  get(float_curr), get(peak_bytes), get(base_curr));
290 }
291 
292 void set_memory_limit_indication(ssize_t size) {
293  // fprintf(stderr, PPREFIX "set_memory_limit_indication %zu\n", size);
294  memory_limit_indication = size;
295 }
296 
297 /******************************************************************************/
298 // Run-time memory profiler
299 
300 static constexpr bool mp_enable = true;
301 
303 
304 struct OhlcBar {
305  ssize_t high = 0, low = 0, close = 0;
306 
308  void init(ssize_t current) {
309  high = low = close = current;
310  }
311 
313  void aggregate(ssize_t current) {
314  if (high < current) high = current;
315  if (low > current) low = current;
316  close = current;
317  }
318 };
319 
320 // Two Ohlc bars: for free floating memory and for Thrill base memory.
321 static OhlcBar mp_float, mp_base;
322 
324 static void update_memprofile(ssize_t float_current, ssize_t base_current) {
325 
326  if (!mp_enable) return;
327 
328  if (mp_next_bar) {
329  // start new OHLC bars
330  mp_float.init(float_current);
331  mp_base.init(base_current);
332  mp_next_bar = false;
333  }
334  else {
335  // aggregate into OHLC bars
336  mp_float.aggregate(float_current);
337  mp_base.aggregate(base_current);
338  }
339 }
340 
341 class MemoryProfiler final : public common::ProfileTask
342 {
343 public:
344  explicit MemoryProfiler(common::JsonLogger& logger) : logger_(logger) { }
345 
346  void RunTask(const std::chrono::steady_clock::time_point& tp) final;
347 
348 private:
349  //! reference to JsonLogger for output
350  common::JsonLogger& logger_;
351 };
352 
354 void MemoryProfiler::RunTask(const std::chrono::steady_clock::time_point&) {
355 
356  // -tb: the access to the these memory positions is not synchronized. I'm
357  // not sure how to do this right without a performance hit for each malloc()
358  // call.
359 
360  // copy current values
361  OhlcBar copy_float = mp_base, copy_base = mp_base;
362  mp_next_bar = true;
363 
364  common::JsonLine line = logger_.line();
365 
366  line << "class" << "MemProfile"
367  << "event" << "profile"
368  << "total" << copy_float.close + copy_base.close
369  << "float" << copy_float.close
370  << "base" << copy_base.close;
371 
372  line.sub("float_hlc")
373  << "high" << copy_float.high
374  << "low" << copy_float.low
375  << "close" << copy_float.close;
376 
377  line.sub("base_hlc")
378  << "high" << copy_base.high
379  << "low" << copy_base.low
380  << "close" << copy_base.close;
381 }
382 
384  sched.Add(std::chrono::milliseconds(250),
385  new MemoryProfiler(logger), /* own_task */ true);
386 }
387 
388 /******************************************************************************/
389 // Initialize function pointers to the real underlying malloc implementation.
390 
391 #if __linux__ || __APPLE__ || __FreeBSD__
392 
394 static __attribute__ ((constructor)) void init() { // NOLINT
395 
396  // try to use AddressSanitizer's malloc first.
397  real_malloc = (malloc_type)dlsym(RTLD_DEFAULT, "__interceptor_malloc");
398  if (real_malloc)
399  {
400  real_realloc = (realloc_type)dlsym(RTLD_DEFAULT, "__interceptor_realloc");
401  if (!real_realloc) {
402  fprintf(stderr, PPREFIX "dlerror %s\n", dlerror());
403  exit(EXIT_FAILURE);
404  }
405 
406  real_free = (free_type)dlsym(RTLD_DEFAULT, "__interceptor_free");
407  if (!real_free) {
408  fprintf(stderr, PPREFIX "dlerror %s\n", dlerror());
409  exit(EXIT_FAILURE);
410  }
411 
412  fprintf(stderr, PPREFIX "using AddressSanitizer's malloc\n");
413 
414  return;
415  }
416 
417  real_malloc = (malloc_type)dlsym(RTLD_NEXT, "malloc");
418  if (!real_malloc) {
419  fprintf(stderr, PPREFIX "dlerror %s\n", dlerror());
420  exit(EXIT_FAILURE);
421  }
422 
423  real_realloc = (realloc_type)dlsym(RTLD_NEXT, "realloc");
424  if (!real_realloc) {
425  fprintf(stderr, PPREFIX "dlerror %s\n", dlerror());
426  exit(EXIT_FAILURE);
427  }
428 
429  real_aligned_alloc = (aligned_alloc_type)dlsym(RTLD_NEXT, "aligned_alloc");
430 
431  real_free = (free_type)dlsym(RTLD_NEXT, "free");
432  if (!real_free) {
433  fprintf(stderr, PPREFIX "dlerror %s\n", dlerror());
434  exit(EXIT_FAILURE);
435  }
436 }
437 
439 static __attribute__ ((destructor)) void finish() { // NOLINT
441  fprintf(stderr, PPREFIX
442  "exiting, total: %zu, peak: %zu, current: %zu / %zu, "
443  "allocs: %zu, unfreed: %zu\n",
444  get(total_bytes), get(peak_bytes),
445  get(float_curr), get(base_curr),
446  get(total_allocs), get(current_allocs));
447 }
448 
449 #endif
450 
451 /******************************************************************************/
452 // Functions to bypass the malloc tracker
453 
454 #if !defined(NDEBUG) && BYPASS_CHECKER
455 static constexpr size_t kBypassCheckerSize = 1024 * 1024;
456 static std::pair<void*, size_t> s_bypass_checker[kBypassCheckerSize];
457 static std::mutex s_bypass_mutex;
458 #endif
459 
461 void * bypass_malloc(size_t size) noexcept {
462 #if defined(_MSC_VER)
463  void* ptr = malloc(size);
464 #else
465  void* ptr = real_malloc(size);
466 #endif
467  if (!ptr) {
468  fprintf(stderr, PPREFIX "bypass_malloc(%zu size) = %p (current %zu / %zu)\n",
469  size, ptr, get(float_curr), get(base_curr));
470  return ptr;
471  }
472 
473  if (log_bypass_operations && size >= log_bypass_operations_threshold) {
474  fprintf(stderr, PPREFIX "bypass_malloc(%zu size) = %p (current %zu / %zu)\n",
475  size, ptr, get(float_curr), get(base_curr));
476  }
477 
478  if (profile_bypass_operations) {
480  stdout, 16, PPREFIX "bypass profile %zu", size);
481  }
482 
483 #if !defined(NDEBUG) && BYPASS_CHECKER
484  {
485  std::unique_lock<std::mutex> lock(s_bypass_mutex);
486  size_t i;
487  for (i = 0; i < kBypassCheckerSize; ++i) {
488  if (s_bypass_checker[i].first != nullptr) continue;
489  s_bypass_checker[i].first = ptr;
490  s_bypass_checker[i].second = size;
491  break;
492  }
493  if (i == kBypassCheckerSize) abort();
494  }
495 #endif
496 
497  ssize_t mycurr = sync_add_and_fetch(base_curr, size);
498 
499  total_bytes += size;
500  update_peak(float_curr, mycurr);
501 
504 
505  update_memprofile(get(float_curr), mycurr);
506 
507  return ptr;
508 }
509 
511 void bypass_free(void* ptr, size_t size) noexcept {
512 
513 #if !defined(NDEBUG) && BYPASS_CHECKER
514  {
515  std::unique_lock<std::mutex> lock(s_bypass_mutex);
516  size_t i;
517  for (i = 0; i < kBypassCheckerSize; ++i) {
518  if (s_bypass_checker[i].first != ptr) continue;
519 
520  if (s_bypass_checker[i].second == size) {
521  s_bypass_checker[i].first = nullptr;
522  break;
523  }
524 
525  printf(PPREFIX "bypass_free() checker: "
526  "ptr %p size %zu mismatches allocation of %zu\n",
527  ptr, size, s_bypass_checker[i].second);
528  abort();
529  }
530  if (i == kBypassCheckerSize) {
531  printf(PPREFIX "bypass_free() checker: "
532  "ptr = %p size %zu was not found\n", ptr, size);
533  abort();
534  }
535  }
536 #endif
537 
538  ssize_t mycurr = sync_sub_and_fetch(base_curr, size);
539 
541 
542  update_memprofile(get(float_curr), mycurr);
543 
544 #if defined(_MSC_VER)
545  return free(ptr);
546 #else
547  return real_free(ptr);
548 #endif
549 }
550 
552 void * bypass_aligned_alloc(size_t alignment, size_t size) noexcept {
553 #if defined(_MSC_VER)
554  void* ptr = _aligned_malloc(size, alignment);
555 #else
556  void* ptr;
557  if (real_aligned_alloc) {
558  ptr = real_aligned_alloc(alignment, size);
559  }
560  else {
561  // emulate alignment by wasting memory
562  void* mem = real_malloc((alignment - 1) + sizeof(void*) + size);
563 
564  uintptr_t uptr = reinterpret_cast<uintptr_t>(mem) + sizeof(void*);
565  uptr += alignment - (uptr & (alignment - 1));
566  ptr = reinterpret_cast<void*>(uptr);
567 
568  // store original pointer for deallocation
569  (reinterpret_cast<void**>(ptr))[-1] = mem;
570  }
571 #endif
572  if (!ptr) {
573  fprintf(stderr, PPREFIX "bypass_aligned_alloc(%zu align %zu size) = %p (current %zu / %zu)\n",
574  alignment, size, ptr, get(float_curr), get(base_curr));
575  return ptr;
576  }
577 
578 #if !defined(NDEBUG) && BYPASS_CHECKER
579  {
580  std::unique_lock<std::mutex> lock(s_bypass_mutex);
581  size_t i;
582  for (i = 0; i < kBypassCheckerSize; ++i) {
583  if (s_bypass_checker[i].first != nullptr) continue;
584  s_bypass_checker[i].first = ptr;
585  s_bypass_checker[i].second = size;
586  break;
587  }
588  if (i == kBypassCheckerSize) abort();
589  }
590 #endif
591 
592  ssize_t mycurr = sync_add_and_fetch(base_curr, size);
593 
594  total_bytes += size;
595  update_peak(float_curr, mycurr);
596 
599 
600  update_memprofile(get(float_curr), mycurr);
601 
602  return ptr;
603 }
604 
606 void bypass_aligned_free(void* ptr, size_t size) noexcept {
607 
608 #if !defined(NDEBUG) && BYPASS_CHECKER
609  {
610  std::unique_lock<std::mutex> lock(s_bypass_mutex);
611  size_t i;
612  for (i = 0; i < kBypassCheckerSize; ++i) {
613  if (s_bypass_checker[i].first != ptr) continue;
614 
615  if (s_bypass_checker[i].second == size) {
616  s_bypass_checker[i].first = nullptr;
617  break;
618  }
619 
620  printf(PPREFIX "bypass_aligned_free() checker: "
621  "ptr %p size %zu mismatches allocation of %zu\n",
622  ptr, size, s_bypass_checker[i].second);
623  abort();
624  }
625  if (i == kBypassCheckerSize) {
626  printf(PPREFIX "bypass_aligned_free() checker: "
627  "ptr = %p size %zu was not found\n", ptr, size);
628  abort();
629  }
630  }
631 #endif
632 
633  ssize_t mycurr = sync_sub_and_fetch(base_curr, size);
634 
636 
637  update_memprofile(get(float_curr), mycurr);
638 
639 #if defined(_MSC_VER)
640  return _aligned_free(ptr);
641 #else
642  if (real_aligned_alloc) {
643  return real_free(ptr);
644  }
645  else {
646  real_free((reinterpret_cast<void**>(ptr))[-1]);
647  }
648 #endif
649 }
650 
651 } // namespace mem
652 } // namespace thrill
653 
654 /******************************************************************************/
655 // exported symbols that overlay the libc functions
656 
657 using namespace thrill::mem; // NOLINT
658 
660 static void * preinit_malloc(size_t size) noexcept {
661 
662  size_t aligned_size = size + (init_alignment - size % init_alignment);
663 
664 #if defined(_MSC_VER) || USE_ATOMICS
665  size_t offset = (init_heap_use += (padding + aligned_size));
666 #else
667  size_t offset = __sync_fetch_and_add(&init_heap_use, padding + aligned_size);
668 #endif
669 
670  if (offset > INIT_HEAP_SIZE) {
671  fprintf(stderr, PPREFIX "init heap full !!!\n");
672  exit(EXIT_FAILURE);
673  }
674 
675  char* ret = init_heap + offset;
676 
677  //! prepend allocation size and check sentinel
678  *reinterpret_cast<size_t*>(ret) = aligned_size;
679  *reinterpret_cast<size_t*>(ret + padding - sizeof(size_t)) = sentinel;
680 
681  inc_count(aligned_size);
682 
684  fprintf(stderr, PPREFIX "malloc(%zu / %zu) = %p on init heap\n",
685  size, aligned_size, static_cast<void*>(ret + padding));
686  }
687 
688  return ret + padding;
689 }
690 
692 static void * preinit_realloc(void* ptr, size_t size) {
693 
695  fprintf(stderr, PPREFIX "realloc(%p) = on init heap\n", ptr);
696  }
697 
698  ptr = static_cast<char*>(ptr) - padding;
699 
700  if (*reinterpret_cast<size_t*>(
701  static_cast<char*>(ptr) + padding - sizeof(size_t)) != sentinel) {
702  fprintf(stderr, PPREFIX
703  "realloc(%p) has no sentinel !!! memory corruption?\n",
704  ptr);
705  }
706 
707  size_t oldsize = *reinterpret_cast<size_t*>(ptr);
708 
709  if (oldsize >= size) {
710  //! keep old area
711  return static_cast<char*>(ptr) + padding;
712  }
713  else {
714  //! allocate new area and copy data
715  ptr = static_cast<char*>(ptr) + padding;
716  void* newptr = malloc(size);
717  memcpy(newptr, ptr, oldsize);
718  free(ptr);
719  return newptr;
720  }
721 }
722 
724 static void preinit_free(void* ptr) {
725  // don't do any real deallocation.
726 
727  ptr = static_cast<char*>(ptr) - padding;
728 
729  if (*reinterpret_cast<size_t*>(
730  static_cast<char*>(ptr) + padding - sizeof(size_t)) != sentinel) {
731  fprintf(stderr, PPREFIX
732  "free(%p) has no sentinel !!! memory corruption?\n",
733  ptr);
734  }
735 
736  size_t size = *reinterpret_cast<size_t*>(ptr);
737  dec_count(size);
738 
740  fprintf(stderr, PPREFIX "free(%p) -> %zu on init heap\n", ptr, size);
741  }
742 }
743 
744 #if __APPLE__
745 
746 #define NOEXCEPT
747 #define MALLOC_USABLE_SIZE malloc_size
748 #include <malloc/malloc.h>
749 
750 #elif __FreeBSD__
751 
752 #define NOEXCEPT
753 #define MALLOC_USABLE_SIZE malloc_usable_size
754 #include <malloc_np.h>
755 
756 #elif __linux__
757 
758 #define NOEXCEPT noexcept
759 #define MALLOC_USABLE_SIZE malloc_usable_size
760 #include <malloc.h>
761 
762 #endif
763 
764 /******************************************************************************/
765 // Super-simple and Super-Slow Leak Detection
766 
767 #if LEAK_CHECKER
768 static constexpr size_t kLeakCheckerSize = 1024 * 1024;
769 static constexpr size_t kLeakCheckerBacktrace = 32;
770 struct LeakCheckerEntry {
771  void * ptr;
772  size_t size;
773  size_t round;
774  void * addrlist[kLeakCheckerBacktrace];
775 };
776 static LeakCheckerEntry s_leak_checker[kLeakCheckerSize];
777 static std::mutex s_leak_mutex;
778 static size_t s_leak_round = 0;
779 
780 static void leakchecker_malloc(void* ptr, size_t size) {
781  std::unique_lock<std::mutex> lock(s_leak_mutex);
782  size_t i;
783  for (i = 0; i < kLeakCheckerSize; ++i) {
784  if (s_leak_checker[i].ptr != nullptr) continue;
785  s_leak_checker[i].ptr = ptr;
786  s_leak_checker[i].size = size;
787  s_leak_checker[i].round = s_leak_round;
788  // retrieve current stack addresses
789  lock.unlock();
790  backtrace(s_leak_checker[i].addrlist, kLeakCheckerBacktrace);
791  break;
792  }
793  if (i == kLeakCheckerSize) abort();
794 }
795 
796 static void leakchecker_free(void* ptr) {
797  std::unique_lock<std::mutex> lock(s_leak_mutex);
798  size_t i;
799  for (i = 0; i < kLeakCheckerSize; ++i) {
800  if (s_leak_checker[i].ptr == ptr) {
801  s_leak_checker[i].ptr = nullptr;
802  break;
803  }
804  }
805  if (i == kLeakCheckerSize) {
806  printf(PPREFIX "leak_free() checker: "
807  "ptr = %p was not found\n", ptr);
808  // abort();
809  }
810 }
811 #endif
812 
813 namespace thrill {
814 namespace mem {
815 
817 #if LEAK_CHECKER
818  std::unique_lock<std::mutex> lock(s_leak_mutex);
819  for (size_t i = 0; i < kLeakCheckerSize; ++i) {
820  if (s_leak_checker[i].ptr == nullptr) continue;
821 
822  if (s_leak_checker[i].round == s_leak_round) {
823  void** addrlist = s_leak_checker[i].addrlist;
824  printf(PPREFIX "leak checker: "
825  "ptr %p size %zu new unfreed allocation: "
826  "%p %p %p %p %p %p %p %p %p %p %p %p %p %p %p %p "
827  "%p %p %p %p %p %p %p %p %p %p %p %p %p %p %p %p\n",
828  s_leak_checker[i].ptr, s_leak_checker[i].size,
829  addrlist[0], addrlist[1], addrlist[2], addrlist[3],
830  addrlist[4], addrlist[5], addrlist[6], addrlist[7],
831  addrlist[8], addrlist[9], addrlist[10], addrlist[11],
832  addrlist[12], addrlist[13], addrlist[14], addrlist[15],
833  addrlist[16], addrlist[17], addrlist[18], addrlist[19],
834  addrlist[20], addrlist[21], addrlist[22], addrlist[23],
835  addrlist[24], addrlist[25], addrlist[26], addrlist[27],
836  addrlist[28], addrlist[29], addrlist[30], addrlist[31]);
837  }
838  }
839  ++s_leak_round;
840 #endif
841 }
842 
843 } // namespace mem
844 } // namespace thrill
845 
846 /******************************************************************************/
847 
848 #if defined(MALLOC_USABLE_SIZE)
849 
850 /*
851  * This is a malloc() tracker implementation which uses an available system call
852  * to determine the amount of memory used by an allocation (which may be more
853  * than the allocated size). On Linux's glibc there is malloc_usable_size().
854  */
855 
856 //! exported malloc symbol that overrides loading from libc
858 void * malloc(size_t size) NOEXCEPT {
859 
861  return preinit_malloc(size);
862 
863  //! call real malloc procedure in libc
864  void* ret = (*real_malloc)(size);
865  if (!ret) {
866  fprintf(stderr, PPREFIX "malloc(%zu size) = %p (current %zu / %zu)\n",
867  size, ret, get(float_curr), get(base_curr));
868  return nullptr;
869  }
870 
871  size_t size_used = MALLOC_USABLE_SIZE(ret);
872  inc_count(size_used);
873 
874  if (log_operations && size_used >= log_operations_threshold) {
875  fprintf(stderr, PPREFIX "malloc(%zu size / %zu used) = %p (current %zu / %zu)\n",
876  size, size_used, ret, get(float_curr), get(base_curr));
877  }
878 
879  if (profile_operations) {
880 #if !__APPLE__
881  static thread_local bool recursive = false;
882 
883  if (!recursive) {
884  recursive = true;
885 
887  stdout, 16, PPREFIX "profile %zu", size);
888 
889  recursive = false;
890  }
891 #endif
892  }
893 
894  {
895 #if LEAK_CHECKER
896  static thread_local bool recursive = false;
897  if (!recursive) {
898  recursive = true;
899  leakchecker_malloc(ret, size);
900  recursive = false;
901  }
902 #endif
903  }
904 
905  return ret;
906 }
907 
908 //! exported free symbol that overrides loading from libc
910 void free(void* ptr) NOEXCEPT {
911 
912  if (!ptr) return; //! free(nullptr) is no operation
913 
914  if (TLX_UNLIKELY(
915  static_cast<char*>(ptr) >= init_heap &&
916  static_cast<char*>(ptr) <= init_heap + get(init_heap_use)))
917  {
918  return preinit_free(ptr);
919  }
920 
921  if (TLX_UNLIKELY(!real_free)) {
922  fprintf(stderr, PPREFIX
923  "free(%p) outside init heap and without real_free !!!\n", ptr);
924  return;
925  }
926 
927  size_t size_used = MALLOC_USABLE_SIZE(ptr);
928  dec_count(size_used);
929 
930  if (log_operations && size_used >= log_operations_threshold) {
931  fprintf(stderr, PPREFIX "free(%p) -> %zu (current %zu / %zu)\n",
932  ptr, size_used, get(float_curr), get(base_curr));
933  }
934 
935 #if LEAK_CHECKER
936  leakchecker_free(ptr);
937 #endif
938 
939  (*real_free)(ptr);
940 }
941 
942 //! exported calloc() symbol that overrides loading from libc, implemented using
943 //! our malloc
945 void * calloc(size_t nmemb, size_t size) NOEXCEPT {
946  size *= nmemb;
947  void* ret = malloc(size);
948  if (!ret) return ret;
949  memset(ret, 0, size);
950  return ret;
951 }
952 
953 //! exported realloc() symbol that overrides loading from libc
955 void * realloc(void* ptr, size_t size) NOEXCEPT {
956 
957  if (static_cast<char*>(ptr) >= static_cast<char*>(init_heap) &&
958  static_cast<char*>(ptr) <= static_cast<char*>(init_heap) + get(init_heap_use))
959  {
960  return preinit_realloc(ptr, size);
961  }
962 
963  if (size == 0) { //! special case size == 0 -> free()
964  free(ptr);
965  return nullptr;
966  }
967 
968  if (ptr == nullptr) { //! special case ptr == 0 -> malloc()
969  return malloc(size);
970  }
971 
972  size_t oldsize_used = MALLOC_USABLE_SIZE(ptr);
973  dec_count(oldsize_used);
974 
975  void* newptr = (*real_realloc)(ptr, size);
976  if (!newptr) return nullptr;
977 
978  size_t newsize_used = MALLOC_USABLE_SIZE(newptr);
979  inc_count(newsize_used);
980 
981  if (log_operations && newsize_used >= log_operations_threshold)
982  {
983  if (newptr == ptr) {
984  fprintf(stderr, PPREFIX
985  "realloc(%zu -> %zu / %zu) = %p (current %zu / %zu)\n",
986  oldsize_used, size, newsize_used, newptr,
987  get(float_curr), get(base_curr));
988  }
989  else {
990  fprintf(stderr, PPREFIX
991  "realloc(%zu -> %zu / %zu) = %p -> %p (current %zu / %zu)\n",
992  oldsize_used, size, newsize_used, ptr, newptr,
993  get(float_curr), get(base_curr));
994  }
995  }
996 
997  return newptr;
998 }
999 
1000 /******************************************************************************/
1001 
1002 #elif !defined(_MSC_VER) // GENERIC IMPLEMENTATION for Unix
1003 
1004 /*
1005  * This is a generic implementation to count memory allocation by prefixing
1006  * every user allocation with the size. On free, the size can be
1007  * retrieves. Obviously, this wastes lots of memory if there are many small
1008  * allocations.
1009  */
1010 
1011 //! exported malloc symbol that overrides loading from libc
1013 void * malloc(size_t size) NOEXCEPT {
1014 
1015  if (!real_malloc)
1016  return preinit_malloc(size);
1017 
1018  //! call real malloc procedure in libc
1019  void* ret = (*real_malloc)(padding + size);
1020 
1021  inc_count(size);
1022  if (log_operations && size >= log_operations_threshold) {
1023  fprintf(stderr, PPREFIX "malloc(%zu) = %p (current %zu / %zu)\n",
1024  size, static_cast<char*>(ret) + padding,
1025  get(float_curr), get(base_curr));
1026  }
1027 
1028  //! prepend allocation size and check sentinel
1029  *reinterpret_cast<size_t*>(ret) = size;
1030  *reinterpret_cast<size_t*>(
1031  static_cast<char*>(ret) + padding - sizeof(size_t)) = sentinel;
1032 
1033  return static_cast<char*>(ret) + padding;
1034 }
1035 
1036 //! exported free symbol that overrides loading from libc
1038 void free(void* ptr) NOEXCEPT {
1039 
1040  if (!ptr) return; //! free(nullptr) is no operation
1041 
1042  if (static_cast<char*>(ptr) >= init_heap &&
1043  static_cast<char*>(ptr) <= init_heap + get(init_heap_use))
1044  {
1045  return preinit_free(ptr);
1046  }
1047 
1048  if (!real_free) {
1049  fprintf(stderr, PPREFIX
1050  "free(%p) outside init heap and without real_free !!!\n", ptr);
1051  return;
1052  }
1053 
1054  ptr = static_cast<char*>(ptr) - padding;
1055 
1056  if (*reinterpret_cast<size_t*>(
1057  static_cast<char*>(ptr) + padding - sizeof(size_t)) != sentinel) {
1058  fprintf(stderr, PPREFIX
1059  "free(%p) has no sentinel !!! memory corruption?\n", ptr);
1060  }
1061 
1062  size_t size = *reinterpret_cast<size_t*>(ptr);
1063  dec_count(size);
1064 
1065  if (log_operations && size >= log_operations_threshold) {
1066  fprintf(stderr, PPREFIX "free(%p) -> %zu (current %zu / %zu)\n",
1067  ptr, size, get(float_curr), get(base_curr));
1068  }
1069 
1070  (*real_free)(ptr);
1071 }
1072 
1073 //! exported calloc() symbol that overrides loading from libc, implemented using
1074 //! our malloc
1076 void * calloc(size_t nmemb, size_t size) NOEXCEPT {
1077  size *= nmemb;
1078  if (!size) return nullptr;
1079  void* ret = malloc(size);
1080  if (!ret) return ret;
1081  memset(ret, 0, size);
1082  return ret;
1083 }
1084 
1085 //! exported realloc() symbol that overrides loading from libc
1087 void * realloc(void* ptr, size_t size) NOEXCEPT {
1088 
1089  if (static_cast<char*>(ptr) >= static_cast<char*>(init_heap) &&
1090  static_cast<char*>(ptr) <=
1091  static_cast<char*>(init_heap) + get(init_heap_use))
1092  {
1093  return preinit_realloc(ptr, size);
1094  }
1095 
1096  if (size == 0) { //! special case size == 0 -> free()
1097  free(ptr);
1098  return nullptr;
1099  }
1100 
1101  if (ptr == nullptr) { //! special case ptr == 0 -> malloc()
1102  return malloc(size);
1103  }
1104 
1105  ptr = static_cast<char*>(ptr) - padding;
1106 
1107  if (*reinterpret_cast<size_t*>(
1108  static_cast<char*>(ptr) + padding - sizeof(size_t)) != sentinel) {
1109  fprintf(stderr, PPREFIX
1110  "free(%p) has no sentinel !!! memory corruption?\n", ptr);
1111  }
1112 
1113  size_t oldsize = *reinterpret_cast<size_t*>(ptr);
1114 
1115  dec_count(oldsize);
1116  inc_count(size);
1117 
1118  void* newptr = (*real_realloc)(ptr, padding + size);
1119 
1121  {
1122  if (newptr == ptr)
1123  fprintf(stderr, PPREFIX
1124  "realloc(%zu -> %zu) = %p (current %zu / %zu)\n",
1125  oldsize, size, newptr, get(float_curr), get(base_curr));
1126  else
1127  fprintf(stderr, PPREFIX
1128  "realloc(%zu -> %zu) = %p -> %p (current %zu / %zu)\n",
1129  oldsize, size, ptr, newptr, get(float_curr), get(base_curr));
1130  }
1131 
1132  *reinterpret_cast<size_t*>(newptr) = size;
1133 
1134  return static_cast<char*>(newptr) + padding;
1135 }
1136 
1137 /******************************************************************************/
1138 
1139 #else // if defined(_MSC_VER)
1140 
1141 // TODO(tb): dont know how to override malloc/free.
1142 
1143 #endif // IMPLEMENTATION SWITCH
1144 
1145 /******************************************************************************/
low_type low
member containing lower significant integer value
Definition: uint_types.hpp:48
void update_peak(ssize_t float_curr, ssize_t base_curr)
static uint_pair max()
return an uint_pair instance containing the largest value possible
Definition: uint_types.hpp:226
static free_type real_free
void StartMemProfiler(common::ProfileThread &sched, common::JsonLogger &logger)
launch profiler task
high_type high
member containing higher significant integer value
Definition: uint_types.hpp:50
void flush_memory_statistics()
method to flush thread-local memory statistics when memory_exceeded
void * bypass_aligned_alloc(size_t alignment, size_t size) noexcept
bypass malloc tracker and access aligned_alloc() directly
void *(*)(size_t) malloc_type
function pointer to the real procedures, loaded using dlsym()
void set_memory_limit_indication(ssize_t size)
ssize_t CounterType
static constexpr size_t init_alignment
align allocations to init_heap to this number by rounding up allocations
static void dec_count(size_t dec)
decrement allocation to statistics
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24
bool memory_exceeded
memory limit exceeded indicator
ssize_t memory_limit_indication
void *(*)(size_t, size_t) aligned_alloc_type
void bypass_aligned_free(void *ptr, size_t size) noexcept
bypass malloc tracker and access aligned_alloc() directly
void bypass_free(void *ptr, size_t size) noexcept
bypass malloc tracker and access free() directly
void malloc_tracker_reset_peak()
user function to reset the peak allocation to current
static constexpr size_t sentinel
a sentinel value prefixed to each allocation
void malloc_tracker_print_status()
user function which prints current and peak allocation to stderr
static void update_memprofile(ssize_t float_current, ssize_t base_current)
static malloc_type real_malloc
ssize_t malloc_tracker_total_allocs()
user function to return total number of allocations
static OhlcBar mp_base
ssize_t malloc_tracker_current()
user function to return the currently allocated amount of memory
void malloc_tracker_print_leaks()
user function which prints new unfreed areas to stdout since the last call
void Add(const Period &period, ProfileTask *task, bool own_task=false)
Register a regularly scheduled callback.
void * bypass_malloc(size_t size) noexcept
bypass malloc tracker and access malloc() directly
static CounterType float_curr
static constexpr bool profile_operations
static realloc_type real_realloc
void * malloc(size_t size) NOEXCEPT
exported malloc symbol that overrides loading from libc
static CounterType total_allocs
ssize_t malloc_tracker_peak()
user function to return the peak allocation
static constexpr bool profile_bypass_operations
static thread_local LocalStats tl_stats
static CounterType total_bytes
static CounterType mp_next_bar
static aligned_alloc_type real_aligned_alloc
static void * preinit_realloc(void *ptr, size_t size)
static CounterType peak_bytes
static constexpr int log_operations_init_heap
void * calloc(size_t nmemb, size_t size) NOEXCEPT
static constexpr size_t padding
#define INIT_HEAP_SIZE
a simple memory heap for allocations prior to dlsym loading
static const size_t bytes
number of bytes in uint_pair
Definition: uint_types.hpp:75
void *(*)(void *, size_t) realloc_type
#define PPREFIX
output
JsonLine sub(const Key &key)
return JsonLine has sub-dictionary of this one
#define COUNTER_ZERO
static void inc_count(size_t inc)
add allocation to statistics
static CounterType init_heap_use
static constexpr bool mp_enable
static constexpr size_t log_bypass_operations_threshold
static void preinit_free(void *ptr)
static OhlcBar mp_float
static ssize_t sync_sub_and_fetch(CounterType &curr, ssize_t dec)
static constexpr bool log_operations
static CounterType base_curr
static char init_heap[1024 *1024]
JsonLogger is a receiver of JSON output objects for logging.
Definition: json_logger.hpp:69
static constexpr bool log_bypass_operations
static void * preinit_malloc(size_t size) noexcept
static const ssize_t tl_delay_threshold
void free(void *ptr) NOEXCEPT
exported free symbol that overrides loading from libc
#define ATTRIBUTE_NO_SANITIZE
static CounterType current_allocs
void * realloc(void *ptr, size_t size) NOEXCEPT
exported realloc() symbol that overrides loading from libc
void print_raw_backtrace(FILE *out, unsigned int max_frames, const char *fmt,...)
Print a plain hex stack backtrace of the called function to FILE* out, prefixed with the given printf...
Definition: backtrace.cpp:30
static constexpr size_t log_operations_threshold
static ssize_t sync_add_and_fetch(CounterType &curr, ssize_t inc)
void(*)(void *) free_type
JsonLine is an object used to aggregate a set of key:value pairs for output into a JSON log...