Skip to content

Commit 7d98d14

Browse files
committed
free tls slots and main heap on destroy_on_exit (pr #1261 and issue #1259)
1 parent 1142229 commit 7d98d14

File tree

4 files changed

+106
-73
lines changed

4 files changed

+106
-73
lines changed

include/mimalloc/internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ mi_decl_cold mi_theap_t* _mi_heap_theap_get_or_init(const mi_heap_t* heap); //
303303
mi_decl_cold mi_theap_t* _mi_heap_theap_get_peek(const mi_heap_t* heap); // get the theap for a heap without initializing (and return NULL in that case)
304304
void _mi_heap_move_pages(mi_heap_t* heap_from, mi_heap_t* heap_to); // in "arena.c"
305305
void _mi_heap_destroy_pages(mi_heap_t* heap_from); // in "arena.c"
306-
306+
void _mi_heap_force_destroy(mi_heap_t* heap); // allow destroying the main heap
307307

308308
// "stats.c"
309309
void _mi_stats_init(void);

src/heap.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,20 @@ void mi_heap_delete(mi_heap_t* heap) {
205205
mi_heap_free(heap);
206206
}
207207

208+
void _mi_heap_force_destroy(mi_heap_t* heap) {
209+
if (heap==NULL) return;
210+
mi_heap_free_theaps(heap);
211+
_mi_heap_destroy_pages(heap);
212+
if (!_mi_is_heap_main(heap)) { mi_heap_free(heap); }
213+
}
214+
208215
void mi_heap_destroy(mi_heap_t* heap) {
209216
if (heap==NULL) return;
210217
if (_mi_is_heap_main(heap)) {
211218
_mi_warning_message("cannot destroy the main heap\n");
212219
return;
213220
}
214-
mi_heap_free_theaps(heap);
215-
_mi_heap_destroy_pages(heap);
216-
mi_heap_free(heap);
221+
_mi_heap_force_destroy(heap);
217222
}
218223

219224
mi_heap_t* mi_heap_of(const void* p) {

src/init.c

Lines changed: 94 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -509,24 +509,32 @@ static void mi_subproc_unsafe_destroy(mi_subproc_t* subproc)
509509
mi_heap_t* heap = subproc->heaps;
510510
while (heap != NULL) {
511511
mi_heap_t* next = heap->next;
512-
if (heap!=subproc->heap_main) {mi_heap_destroy(heap); }
512+
if (heap!=subproc->heap_main) { mi_heap_destroy(heap); }
513513
heap = next;
514514
}
515515
mi_assert_internal(subproc->heaps == subproc->heap_main);
516-
mi_heap_destroy(subproc->heap_main);
516+
_mi_heap_force_destroy(subproc->heap_main); // no warning if destroying the main heap
517517
}
518518

519+
// remove associated arenas
520+
_mi_arenas_unsafe_destroy_all(subproc);
521+
519522
// merge stats back into the main subproc?
520523
if (subproc!=&subproc_main) {
521-
_mi_arenas_unsafe_destroy_all(subproc);
522524
_mi_stats_merge_into(&subproc_main.stats, &subproc->stats);
525+
}
523526

524-
// safe to release
525-
// todo: should we refcount subprocesses?
526-
mi_lock_done(&subproc->arena_reserve_lock);
527-
mi_lock_done(&subproc->heaps_lock);
527+
// safe to release
528+
// todo: should we refcount subprocesses?
529+
mi_lock_done(&subproc->arena_reserve_lock);
530+
mi_lock_done(&subproc->heaps_lock);
531+
if (subproc!=&subproc_main) {
528532
_mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid);
529533
}
534+
else {
535+
// for the main subproc, also release the global page map
536+
_mi_page_map_unsafe_destroy(&subproc_main);
537+
}
530538
}
531539

532540
void mi_subproc_destroy(mi_subproc_id_t subproc_id) {
@@ -761,8 +769,8 @@ mi_decl_cold mi_decl_noinline mi_theap_t* _mi_theap_empty_get(void) {
761769
#else
762770
// with only direct entries, use the "arbitrary user data" field
763771
// and assume it is NULL (see also <http://www.nynaeve.net/?p=98>)
764-
#define MI_TLS_INITIAL_EXPANSION_SLOT (0)
765772
#define MI_TLS_INITIAL_SLOT (5)
773+
#define MI_TLS_INITIAL_EXPANSION_SLOT (0)
766774
#endif
767775

768776
// we initially use the last of the expansion slots as the default NULL.
@@ -772,38 +780,63 @@ mi_decl_hidden size_t _mi_theap_default_expansion_slot = MI_TLS_INITIAL_EXPANSIO
772780
mi_decl_hidden size_t _mi_theap_cached_slot = MI_TLS_INITIAL_SLOT;
773781
mi_decl_hidden size_t _mi_theap_cached_expansion_slot = MI_TLS_INITIAL_EXPANSION_SLOT;
774782

775-
static size_t mi_win_tls_slot_alloc(size_t* extended) {
776-
const DWORD slot = TlsAlloc();
777-
if (slot==TLS_OUT_OF_INDEXES || slot >= MI_TLS_DIRECT_SLOTS + MI_TLS_EXPANSION_SLOTS - 1) {
778-
// note: we also fail if the program already allocated the maximum number of expansion slots (as we use the last one as the default)
783+
static DWORD mi_tls_raw_index_default = TLS_OUT_OF_INDEXES;
784+
static DWORD mi_tls_raw_index_cached = TLS_OUT_OF_INDEXES;
785+
786+
static bool mi_win_tls_slot_alloc(size_t* slot, size_t* extended, DWORD* raw_index) {
787+
const DWORD index = TlsAlloc();
788+
*raw_index = index;
789+
if (index==TLS_OUT_OF_INDEXES) {
779790
*extended = 0;
780-
return 0;
791+
*slot = 0;
792+
return false;
781793
}
782-
else if (slot<MI_TLS_DIRECT_SLOTS) {
794+
else if (index<MI_TLS_DIRECT_SLOTS) {
783795
*extended = 0;
784-
return (slot + MI_TLS_DIRECT_FIRST);
796+
*slot = index + MI_TLS_DIRECT_FIRST;
797+
return true;
798+
}
799+
#if !MI_WIN_DIRECT_TLS
800+
else if (index < MI_TLS_DIRECT_SLOTS + MI_TLS_EXPANSION_SLOTS - 1) { // check maximum number of expansion slots - 1 (as we use the last one as the default)
801+
*extended = index - MI_TLS_DIRECT_SLOTS;
802+
*slot = MI_TLS_EXPANSION_SLOT;
803+
return true;
785804
}
805+
#endif
786806
else {
787-
#if MI_WIN_DIRECT_TLS
807+
// to high an index for us
808+
_mi_error_message(EFAULT, "returned tls index was too high (%u)\n", index);
809+
TlsFree(index);
810+
*raw_index = TLS_OUT_OF_INDEXES;
788811
*extended = 0;
789-
return 0;
790-
#else
791-
*extended = (slot - MI_TLS_DIRECT_SLOTS);
792-
return MI_TLS_EXPANSION_SLOT;
793-
#endif
812+
*slot = 0;
813+
return false;
814+
}
815+
}
816+
817+
static void mi_win_tls_slot_free(DWORD* raw_index) {
818+
if (*raw_index != TLS_OUT_OF_INDEXES) {
819+
TlsFree(*raw_index);
820+
*raw_index = TLS_OUT_OF_INDEXES;
794821
}
795822
}
796823

797-
mi_decl_cold mi_theap_t* _mi_win_tls_slots_init(void) {
824+
static void mi_tls_slots_init(void) {
798825
static mi_atomic_once_t tls_slots_init;
799826
if (mi_atomic_once(&tls_slots_init)) {
800-
_mi_theap_default_slot = mi_win_tls_slot_alloc(&_mi_theap_default_expansion_slot);
801-
_mi_theap_cached_slot = mi_win_tls_slot_alloc(&_mi_theap_cached_expansion_slot);
802-
if (_mi_theap_cached_slot==0) {
827+
bool ok = mi_win_tls_slot_alloc(&_mi_theap_default_slot, &_mi_theap_default_expansion_slot, &mi_tls_raw_index_default);
828+
if (ok) {
829+
ok = mi_win_tls_slot_alloc(&_mi_theap_cached_slot, &_mi_theap_cached_expansion_slot, &mi_tls_raw_index_cached);
830+
}
831+
if (!ok) {
803832
_mi_error_message(EFAULT, "unable to allocate fast TLS user slot (0x%zx)\n", _mi_theap_cached_slot);
804833
}
805834
}
806-
return (mi_theap_t*)&_mi_theap_empty;
835+
}
836+
837+
static void mi_tls_slots_done(void) {
838+
mi_win_tls_slot_free(&mi_tls_raw_index_default);
839+
mi_win_tls_slot_free(&mi_tls_raw_index_cached );
807840
}
808841

809842
static void mi_win_tls_slot_set(size_t slot, size_t extended_slot, void* value) {
@@ -823,13 +856,38 @@ static void mi_win_tls_slot_set(size_t slot, size_t extended_slot, void* value)
823856
mi_decl_hidden pthread_key_t _mi_theap_default_key = 0;
824857
mi_decl_hidden pthread_key_t _mi_theap_cached_key = 0;
825858

826-
mi_decl_cold mi_theap_t* _mi_tls_keys_init(void) {
859+
static void mi_tls_slots_init(void) {
827860
static mi_atomic_once_t tls_keys_init;
828861
if (mi_atomic_once(&tls_keys_init)) {
829-
pthread_key_create(&_mi_theap_default_key, NULL);
830-
pthread_key_create(&_mi_theap_cached_key, NULL);
862+
int err = pthread_key_create(&_mi_theap_default_key, NULL);
863+
if (err==0) {
864+
err = pthread_key_create(&_mi_theap_cached_key, NULL);
865+
}
866+
if (err!=0) {
867+
_mi_error_message(EFAULT, "unable to allocate pthread keys (error %d)\n", err);
868+
}
869+
}
870+
}
871+
872+
static void mi_tls_slots_done(void) {
873+
if (_mi_theap_default_key != 0) {
874+
pthread_key_delete(_mi_theap_default_key);
875+
_mi_theap_default_key = 0;
876+
}
877+
if (_mi_theap_cached_key != 0) {
878+
pthread_key_delete(_mi_theap_cached_key);
879+
_mi_theap_cached_key = 0;
831880
}
832-
return (mi_theap_t*)&_mi_theap_empty;
881+
}
882+
883+
#else
884+
885+
static void mi_tls_slots_init(void) {
886+
// nothing
887+
}
888+
889+
static void mi_tls_slots_done(void) {
890+
// nothing
833891
}
834892

835893
#endif
@@ -838,15 +896,14 @@ void _mi_theap_cached_set(mi_theap_t* theap) {
838896
mi_theap_t* prev = _mi_theap_cached();
839897
if (prev==theap) return;
840898
// set
899+
mi_tls_slots_init();
841900
#if MI_TLS_MODEL_THREAD_LOCAL
842901
__mi_theap_cached = theap;
843902
#elif MI_TLS_MODEL_FIXED_SLOT
844903
mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_CACHED, theap);
845904
#elif MI_TLS_MODEL_DYNAMIC_WIN32
846-
_mi_win_tls_slots_init();
847905
mi_win_tls_slot_set(_mi_theap_cached_slot, _mi_theap_cached_expansion_slot, theap);
848906
#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
849-
_mi_tls_keys_init();
850907
if (_mi_theap_cached_key!=0) pthread_setspecific(_mi_theap_cached_key, theap);
851908
#endif
852909
// update refcounts (so cached theap memory keeps available until no longer cached)
@@ -858,15 +915,14 @@ void _mi_theap_default_set(mi_theap_t* theap) {
858915
mi_theap_t* const theap_old = _mi_theap_default();
859916
mi_assert_internal(theap != NULL);
860917
mi_assert_internal(theap->tld->thread_id==0 || theap->tld->thread_id==_mi_thread_id());
918+
mi_tls_slots_init();
861919
#if MI_TLS_MODEL_THREAD_LOCAL
862920
__mi_theap_default = theap;
863921
#elif MI_TLS_MODEL_FIXED_SLOT
864922
mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_DEFAULT, theap);
865923
#elif MI_TLS_MODEL_DYNAMIC_WIN32
866-
_mi_win_tls_slots_init();
867924
mi_win_tls_slot_set(_mi_theap_default_slot, _mi_theap_default_expansion_slot, theap);
868925
#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
869-
_mi_tls_keys_init();
870926
if (_mi_theap_default_key!=0) pthread_setspecific(_mi_theap_default_key, theap);
871927
#endif
872928

@@ -1080,17 +1136,18 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept {
10801136
// since after process_done there might still be other code running that calls `free` (like at_exit routines,
10811137
// or C-runtime termination code.
10821138
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
1083-
mi_subprocs_unsafe_destroy_all();
1084-
_mi_page_map_unsafe_destroy(_mi_subproc_main());
1139+
mi_subprocs_unsafe_destroy_all(); // destroys all subprocs, arenas, and the page_map!
10851140
}
10861141
else {
10871142
mi_heap_stats_merge_to_subproc(mi_heap_main());
10881143
}
1089-
1144+
1145+
// careful now to no longer access any allocator functionality
10901146
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
10911147
mi_subproc_stats_print_out(NULL, NULL, NULL);
10921148
}
10931149
mi_lock_done(&subprocs_lock);
1150+
mi_tls_slots_done();
10941151
_mi_allocator_done();
10951152
_mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id);
10961153
os_preloading = true; // don't call the C runtime anymore

src/prim/windows/prim.c

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -698,42 +698,14 @@ bool _mi_prim_thread_is_in_threadpool(void) {
698698
// Process & Thread Init/Done
699699
//----------------------------------------------------------------
700700

701-
#if MI_WIN_USE_FIXED_TLS==1
702-
mi_decl_cache_align size_t _mi_win_tls_offset = 0;
703-
#endif
704-
705701
//static void mi_debug_out(const char* s) {
706702
// HANDLE h = GetStdHandle(STD_ERROR_HANDLE);
707703
// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL);
708704
//}
709705

710-
static void mi_win_tls_init(DWORD reason) {
711-
if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
712-
#if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically
713-
if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) {
714-
const DWORD tls_slot = TlsAlloc(); // usually returns slot 1
715-
if (tls_slot == TLS_OUT_OF_INDEXES) {
716-
_mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
717-
}
718-
_mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
719-
}
720-
#endif
721-
#if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
722-
if (_mi_theap_default() == NULL) {
723-
_mi_theap_default_set((mi_theap_t*)&_mi_theap_empty);
724-
#if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
725-
void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
726-
mi_assert_internal(p == (void*)&_mi_theap_empty);
727-
#endif
728-
}
729-
#endif
730-
}
731-
}
732-
733706
static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
734707
MI_UNUSED(reserved);
735708
MI_UNUSED(module);
736-
mi_win_tls_init(reason);
737709
if (reason==DLL_PROCESS_ATTACH) {
738710
_mi_auto_process_init();
739711
}
@@ -748,10 +720,10 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
748720

749721
/* ----------------------------------------------------------------------
750722
Auto initialize and finalize mimalloc on process and thread start/end.
751-
By default we use a combination of _pRawDllMain and TLS sections for
752-
both static and dynamic linkage
723+
By default we use a combination of _pRawDllMain and TLS sections for
724+
both static and dynamic linkage
753725
------------------------------------------------------------------------- */
754-
#ifndef MI_WIN_NO_RAW_DLLMAIN
726+
#ifndef MI_WIN_NO_RAW_DLLMAIN
755727
#define MI_PRIM_HAS_PROCESS_ATTACH 1
756728
// nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
757729
void _mi_prim_thread_init_auto_done(void) {}
@@ -993,7 +965,6 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
993965
#endif
994966
mi_decl_export void _mi_redirect_entry(DWORD reason) {
995967
// called on redirection; careful as this may be called before DllMain
996-
mi_win_tls_init(reason);
997968
if (reason == DLL_PROCESS_ATTACH) {
998969
mi_redirected = true;
999970
}

0 commit comments

Comments
 (0)