ThreadLocalCollector.cpp [plain text]
#include "ThreadLocalCollector.h"
#include "auto_trace.h"
#include "auto_dtrace.h"
#include "Locks.h"
#include "BlockRef.h"
namespace Auto {
inline bool is_block_aligned_range(void **start, void **end) {
return (((uintptr_t)start | (uintptr_t)end) & mask(block_alignment)) == 0;
}
inline void ThreadLocalCollector::append_block(void *block) {
_tlcBuffer[_tlcBufferCount++] = block;
}
inline void ThreadLocalCollector::mark_push_block(void *block) {
if (_zone->in_subzone_memory(block)) {
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (subzone->block_is_start(q) && subzone->is_thread_local(q)) {
int32_t blockIndex = _localBlocks.slotIndex(block);
if (blockIndex != -1 && !_localBlocks.testAndSetMarked(blockIndex)) {
append_block(block);
}
}
}
}
void ThreadLocalCollector::scan_stack_range(const Range &range) {
void ** reference = (void **)range.address();
void ** const end = (void **)range.end();
const uintptr_t valid_lowest = (uintptr_t)_coverage.address();
const uintptr_t valid_size = (uintptr_t)_coverage.end() - valid_lowest;
if (is_block_aligned_range(reference, end)) {
while (reference < end) {
void *referent0 = reference[0];
void *referent1 = reference[1];
void *referent2 = reference[2];
void *referent3 = reference[3];
reference += 4; __builtin_prefetch(reference);
if (((intptr_t)referent0 - valid_lowest) < valid_size) mark_push_block(referent0);
if (((intptr_t)referent1 - valid_lowest) < valid_size) mark_push_block(referent1);
if (((intptr_t)referent2 - valid_lowest) < valid_size) mark_push_block(referent2);
if (((intptr_t)referent3 - valid_lowest) < valid_size) mark_push_block(referent3);
}
} else {
for (void *last_valid_pointer = end - 1; reference <= last_valid_pointer; ++reference) {
void *referent = *reference;
if (((intptr_t)referent - valid_lowest) < valid_size) {
mark_push_block(referent);
}
}
}
}
void ThreadLocalCollector::scan_range(const Range &range) {
void ** reference = (void **)range.address();
void ** const end = (void **)range.end();
const uintptr_t valid_lowest = (uintptr_t)_coverage.address();
const uintptr_t valid_size = (uintptr_t)_coverage.end() - valid_lowest;
for (void *last_valid_pointer = end - 1; reference <= last_valid_pointer; ++reference) {
void *referent = *reference;
if (((intptr_t)referent - valid_lowest) < valid_size) {
mark_push_block(referent);
}
}
}
void ThreadLocalCollector::scan_with_layout(const Range &range, const unsigned char* map) {
void **reference = (void **)range.address();
void ** const end = (void **)range.end();
Range subrange;
while (unsigned data = *map++) {
unsigned skip = data >> 4;
unsigned run = data & 0xf;
reference += skip;
subrange.set_range(reference, reference + run);
if (subrange.address() < end && subrange.end() <= end) {
scan_range(subrange);
} else {
break;
}
reference += run;
}
if (reference < end) {
subrange.set_range((void *)reference, end);
scan_range(subrange);
}
}
inline void ThreadLocalCollector::scan_local_block(Subzone *subzone, usword_t q, void *block) {
Range range(block, subzone->size(q));
const unsigned char *map = (subzone->layout(q) & AUTO_OBJECT) ? _zone->layout_map_for_block(block) : NULL;
if (map)
scan_with_layout(range, map);
else
scan_range(range);
}
void ThreadLocalCollector::scan_marked_blocks() {
size_t index = 0;
while (index < _tlcBufferCount) {
void *block = _tlcBuffer[index++];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (subzone->should_scan_local_block(q)) {
scan_local_block(subzone, q, block);
}
}
}
void ThreadLocalCollector::scavenge_local(size_t count, void *garbage[]) {
size_t blocks_freed = 0;
size_t bytes_freed = 0;
size_t bytes_dropped = 0;
Zone *zone = _thread.zone();
if (zone->collection_checking_enabled()) {
zone->clear_garbage_checking_count(garbage, count);
}
GARBAGE_COLLECTION_COLLECTION_PHASE_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_SCAVENGING_PHASE);
for (size_t index = 0; index < count; index++) {
void *block = garbage[index];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (!subzone->has_refcount(q)) {
blocks_freed++;
size_t block_size = subzone->size(q);
if (malloc_logger) malloc_logger(MALLOC_LOG_TYPE_DEALLOCATE | MALLOC_LOG_TYPE_HAS_ZONE, uintptr_t(_zone), uintptr_t(block), 0, 0, 0);
if (!_thread.thread_cache_add(block, subzone, q)) {
subzone->allocate(q, subzone->length(q), AUTO_UNSCANNED, false, false);
bytes_dropped += block_size;
} else {
bytes_freed += block_size;
}
} else {
SubzoneBlockRef ref(subzone, q);
if (!is_zombie(block)) {
_zone->handle_overretained_garbage(block, ref.refcount(), ref.layout());
} else {
SpinLock lock(subzone->admin()->lock()); subzone->allocate(q, subzone->length(q), subzone->layout(q), true, false);
_zone->zombify_internal(ref);
}
}
}
if (bytes_dropped) {
_zone->adjust_allocation_counter(bytes_dropped);
}
GARBAGE_COLLECTION_COLLECTION_PHASE_END((auto_zone_t*)_zone, AUTO_TRACE_SCAVENGING_PHASE, (uint64_t)blocks_freed, (uint64_t)bytes_freed);
}
static void finalize_work(Zone *zone, const size_t garbage_count, void *garbage[]) {
size_t blocks_freed = 0, bytes_freed = 0;
zone->invalidate_garbage(garbage_count, garbage);
zone->free_garbage(garbage_count, garbage, 0, NULL, blocks_freed, bytes_freed); zone->clear_zombies();
aux_free(garbage);
}
bool ThreadLocalCollector::block_in_garbage_list(void *block) {
for (size_t i=0; i<_tlcBufferCount; i++) {
if (_tlcBuffer[i] == block)
return true;
}
return false;
}
void ThreadLocalCollector::evict_local_garbage() {
size_t evict_cursor = _tlcBufferCount;
size_t scan_cursor = 0;
while (scan_cursor < _tlcBufferCount) {
void *block = _tlcBuffer[scan_cursor++];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (subzone->is_scanned(q)) {
scan_local_block(subzone, q, block);
}
}
usword_t global_size = 0;
while (evict_cursor < _tlcBufferCount) {
void *block = _tlcBuffer[evict_cursor++];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
subzone->make_global(q);
_localBlocks.remove(block);
global_size += subzone->size(q);
}
if (global_size != 0)
_zone->adjust_allocation_counter(global_size);
}
void ThreadLocalCollector::process_local_garbage(void (*garbage_list_handler)(ThreadLocalCollector *)) {
usword_t garbage_count = _localBlocks.count() - _tlcBufferCount;
if (garbage_count == 0) {
_localBlocks.clearFlags(); GARBAGE_COLLECTION_COLLECTION_END((auto_zone_t*)_zone, 0ull, 0ull, _localBlocks.count(), (uint64_t)(-1));
return;
}
_tlcBufferCount = 0;
size_t scavenged_size = 0;
for (uint32_t i = _localBlocks.firstOccupiedSlot(), last = _localBlocks.lastOccupiedSlot(); (i <= last) && (_tlcBufferCount != garbage_count); i++) {
void *block = _localBlocks.unmarkedPointerAtIndex(i);
if (block) {
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (subzone->is_thread_local(q)) {
scavenged_size += subzone->size(q);
append_block(block);
_localBlocks.remove(i);
} else {
auto_error(_zone, "not thread local garbage", (const void *)block);
}
}
}
#ifdef MEASURE_TLC_STATS
_zone->statistics().add_local_collected(_tlcBufferCount);
#endif
if (_thread.suspended())
_localBlocks.clearFlagsRehash();
else
_localBlocks.clearFlagsCompact();
AUTO_PROBE(auto_probe_end_local_scan(_tlcBufferCount, &_tlcBuffer[0]));
garbage_list_handler(this);
if (GARBAGE_COLLECTION_COLLECTION_PHASE_END_ENABLED())
GARBAGE_COLLECTION_COLLECTION_END((auto_zone_t*)_zone, garbage_count, (uint64_t)scavenged_size, _localBlocks.count(), (uint64_t)_localBlocks.localsSize());
}
void ThreadLocalCollector::finalize_local_garbage_now(ThreadLocalCollector *tlc) {
size_t garbage_count = tlc->_tlcBufferCount;
mark_local_garbage(tlc->_tlcBuffer, garbage_count);
tlc->_zone->invalidate_garbage(garbage_count, &tlc->_tlcBuffer[0]);
tlc->scavenge_local(garbage_count, &tlc->_tlcBuffer[0]);
#ifdef MEASURE_TLC_STATS
tlc->_zone->statistics().add_recycled(garbage_count);
#endif
}
inline void ThreadLocalCollector::mark_local_garbage(void **garbage_list, size_t garbage_count) {
for (size_t i = 0; i < garbage_count; i++) {
void *block = garbage_list[i];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
subzone->mark_local_garbage(q);
}
}
void ThreadLocalCollector::finalize_local_garbage_later(ThreadLocalCollector *tlc) {
size_t garbage_count = tlc->_tlcBufferCount;
tlc->evict_local_garbage(); mark_local_garbage(tlc->_tlcBuffer, garbage_count);
Zone *z = tlc->_zone;
void **garbage_copy = (void **)aux_malloc(garbage_count * sizeof(void *));
memcpy(garbage_copy, tlc->_tlcBuffer, garbage_count * sizeof(void *));
dispatch_async(tlc->_zone->_collection_queue, ^{ finalize_work(z, garbage_count, garbage_copy); });
#ifdef MEASURE_TLC_STATS
tlc->_zone->statistics().add_global_freed(garbage_count);
#endif
}
void ThreadLocalCollector::unmark_local_garbage(ThreadLocalCollector *tlc) {
size_t garbage_count = tlc->_tlcBufferCount;
tlc->evict_local_garbage(); mark_local_garbage(tlc->_tlcBuffer, garbage_count);
for (uint32_t i=0; i<garbage_count; i++) {
void *block = tlc->_tlcBuffer[i];
Subzone *sz = Subzone::subzone(block);
usword_t q = sz->quantum_index_unchecked(block);
sz->test_and_clear_mark(q);
sz->mark_global_garbage(q);
}
#ifdef MEASURE_TLC_STATS
tlc->_zone->statistics().add_global_freed(garbage_count);
#endif
}
bool ThreadLocalCollector::should_collect(Zone *zone, Thread &thread, bool canFinalizeNow) {
if (thread.thread_local_collector() == NULL) {
if (canFinalizeNow) {
return (thread.locals().count() >= (local_allocations_size_limit/10));
} else {
if (zone->_collection_queue) {
return (thread.locals().count() >= local_allocations_size_limit);
}
}
}
return false;
}
bool ThreadLocalCollector::should_collect_suspended(Thread &thread)
{
assert(thread.suspended());
bool collect = (malloc_logger == NULL) && thread.tlc_watchdog_should_trigger() && !Sentinel::is_guarded(thread.localsGuard()) && thread.locals().count() > 0;
if (collect)
thread.tlc_watchdog_disable();
else
thread.tlc_watchdog_tickle();
return collect;
}
#ifndef __BLOCKS__
class thread_local_scanner_helper : public Thread::thread_scanner {
ThreadLocalCollector &_collector;
public:
thread_local_scanner_helper(ThreadLocalCollector &collector) : _collector(collector) {}
virtual void operator() (Thread *thread, Range &range) { _collector.scan_stack_range(range); }
};
#endif
void ThreadLocalCollector::trace_scanning_phase_end() {
size_t scanned_size = 0;
for (usword_t i = 0; i < _tlcBufferCount; i++) {
void *block = _tlcBuffer[i++];
Subzone *subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
if (subzone->should_scan_local_block(q)) {
scanned_size += subzone->size(q);
}
}
GARBAGE_COLLECTION_COLLECTION_PHASE_END((auto_zone_t*)_zone, AUTO_TRACE_SCANNING_PHASE, (uint64_t)_tlcBufferCount, (uint64_t)scanned_size);
}
void ThreadLocalCollector::collect(bool finalizeNow) {
AUTO_PROBE(auto_probe_begin_local_scan());
assert(_thread.thread_local_collector() == NULL);
_thread.set_thread_local_collector(this);
_thread.tlc_watchdog_reset();
GARBAGE_COLLECTION_COLLECTION_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_LOCAL);
GARBAGE_COLLECTION_COLLECTION_PHASE_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_SCANNING_PHASE);
#ifdef __BLOCKS__
_thread.scan_current_thread(^(Thread *thread, const Range &range) {
this->scan_stack_range(range);
}, _stack_bottom);
#else
thread_local_scanner_helper helper(*this);
_thread.scan_current_thread(helper, _stack_bottom);
#endif
scan_marked_blocks();
if (GARBAGE_COLLECTION_COLLECTION_PHASE_END_ENABLED()) {
trace_scanning_phase_end();
}
process_local_garbage(finalizeNow ? finalize_local_garbage_now : finalize_local_garbage_later);
_thread.set_thread_local_collector(NULL);
if (_localBlocks.count() > local_allocations_size_limit/2)
_thread.flush_local_blocks();
AUTO_PROBE(auto_probe_local_collection_complete());
}
void ThreadLocalCollector::collect_suspended(Range ®isters, Range &stack) {
AUTO_PROBE(auto_probe_begin_local_scan());
assert(_thread.thread_local_collector() == NULL);
assert(_thread.suspended());
_thread.set_thread_local_collector(this);
GARBAGE_COLLECTION_COLLECTION_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_LOCAL);
GARBAGE_COLLECTION_COLLECTION_PHASE_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_SCANNING_PHASE);
scan_range(stack);
scan_range(registers);
scan_marked_blocks();
if (GARBAGE_COLLECTION_COLLECTION_PHASE_END_ENABLED()) {
trace_scanning_phase_end();
}
process_local_garbage(unmark_local_garbage);
_thread.set_thread_local_collector(NULL);
AUTO_PROBE(auto_probe_local_collection_complete());
}
void ThreadLocalCollector::reap_all() {
GARBAGE_COLLECTION_COLLECTION_BEGIN((auto_zone_t*)_zone, AUTO_TRACE_LOCAL);
_thread.set_thread_local_collector(this);
process_local_garbage(finalize_local_garbage_now);
_thread.set_thread_local_collector(NULL);
}
void ThreadLocalCollector::eject_local_block(void *startingBlock) {
if (_thread.thread_local_collector() != NULL) {
_tlcBuffer = (void **)malloc(local_allocations_size_limit * sizeof(void *));
}
Subzone *subzone = Subzone::subzone(startingBlock);
#ifndef NDEBUG
{
usword_t q;
assert(subzone->block_is_start(startingBlock, &q) && subzone->is_thread_local(q));
assert(_localBlocks.slotIndex(startingBlock) != -1);
}
#endif
mark_push_block(startingBlock);
scan_marked_blocks();
size_t evicted_size = 0;
for (size_t i = 0; i < _tlcBufferCount; i++) {
void *block = _tlcBuffer[i];
subzone = Subzone::subzone(block);
usword_t q = subzone->quantum_index_unchecked(block);
assert(subzone->is_thread_local(q));
subzone->make_global(q);
_localBlocks.remove(block);
evicted_size += subzone->size(q);
}
_zone->adjust_allocation_counter(evicted_size);
if (_thread.thread_local_collector() != NULL) {
free(_tlcBuffer);
}
}
void ThreadLocalCollector::add_zombie(void *block) {
if (!_zombies)
_zombies = new PtrHashSet();
if (_zombies->find(block) == _zombies->end()) {
_zombies->insert(block);
}
}
inline bool ThreadLocalCollector::is_zombie(void *block) {
if (_zombies) {
PtrHashSet::iterator iter = _zombies->find(block);
return (iter != _zombies->end());
} else {
return false;
}
}
}