8302820: Remove costs for NMTPreInit when NMT is off

Reviewed-by: jsjolen, adinn
This commit is contained in:
Thomas Stuefe
2023-03-02 13:21:25 +00:00
parent 72de24e59a
commit c9afd55ed6
5 changed files with 204 additions and 138 deletions

View File

@@ -80,7 +80,7 @@ void MemTracker::initialize() {
}
}
NMTPreInit::pre_to_post();
NMTPreInit::pre_to_post(level == NMT_off);
_tracking_level = level;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 SAP SE. All rights reserved.
* Copyright (c) 2022, 2023 SAP SE. All rights reserved.
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -37,55 +37,82 @@ static void* raw_malloc(size_t s) { ALLOW_C_FUNCTION(::malloc, ret
static void* raw_realloc(void* old, size_t s) { ALLOW_C_FUNCTION(::realloc, return ::realloc(old, s);) }
static void raw_free(void* p) { ALLOW_C_FUNCTION(::free, ::free(p);) }
// We must ensure that the start of the payload area of the nmt lookup table nodes is malloc-aligned
static const size_t malloc_alignment = 2 * sizeof(void*); // could we use max_align_t?
STATIC_ASSERT(is_aligned(sizeof(NMTPreInitAllocation), malloc_alignment));
// To keep matters simple we just raise a fatal error on OOM. Since preinit allocation
// is just used for pre-VM-initialization mallocs, none of which are optional, we don't
// need a finer grained error handling.
static void fail_oom(size_t size) {
vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "VM early initialization phase");
static void* raw_checked_malloc(size_t s) {
void* p = raw_malloc(s);
if (p == nullptr) {
vm_exit_out_of_memory(s, OOM_MALLOC_ERROR, "VM early initialization phase");
}
return p;
}
static void* raw_checked_realloc(void* old, size_t s) {
void* p = raw_realloc(old, s);
if (p == nullptr) {
vm_exit_out_of_memory(s, OOM_MALLOC_ERROR, "VM early initialization phase");
}
return p;
}
// --------- NMTPreInitAllocation --------------
NMTPreInitAllocation* NMTPreInitAllocation::do_alloc(size_t payload_size) {
const size_t outer_size = sizeof(NMTPreInitAllocation) + payload_size;
guarantee(outer_size > payload_size, "Overflow");
void* p = raw_malloc(outer_size);
if (p == nullptr) {
fail_oom(outer_size);
}
NMTPreInitAllocation* a = new(p) NMTPreInitAllocation(payload_size);
return a;
void* NMTPreInitAllocation::operator new(size_t count) {
return raw_checked_malloc(count);
}
NMTPreInitAllocation* NMTPreInitAllocation::do_reallocate(NMTPreInitAllocation* old, size_t new_payload_size) {
assert(old->next == nullptr, "unhang from map first");
// We just reallocate the old block, header and all.
const size_t new_outer_size = sizeof(NMTPreInitAllocation) + new_payload_size;
guarantee(new_outer_size > new_payload_size, "Overflow");
void* p = raw_realloc(old, new_outer_size);
if (p == nullptr) {
fail_oom(new_outer_size);
}
// re-stamp header with new size
NMTPreInitAllocation* a = new(p) NMTPreInitAllocation(new_payload_size);
return a;
}
void NMTPreInitAllocation::do_free(NMTPreInitAllocation* p) {
assert(p->next == nullptr, "unhang from map first");
void NMTPreInitAllocation::operator delete(void* p) {
raw_free(p);
}
NMTPreInitAllocation* NMTPreInitAllocation::do_alloc(size_t payload_size) {
void* payload = raw_checked_malloc(payload_size);
NMTPreInitAllocation* a = new NMTPreInitAllocation(payload_size, payload);
return a;
}
NMTPreInitAllocation* NMTPreInitAllocation::do_reallocate(NMTPreInitAllocation* a, size_t new_payload_size) {
assert(a->next == nullptr, "unhang from map first");
void* new_payload = raw_checked_realloc(a->payload, new_payload_size);
NMTPreInitAllocation* a2 = new NMTPreInitAllocation(new_payload_size, new_payload);
delete a;
return a2;
}
void NMTPreInitAllocation::do_free(NMTPreInitAllocation* a) {
assert(a->next == nullptr, "unhang from map first");
raw_free(a->payload);
delete a;
}
// --------- NMTPreInitAllocationTable --------------
void* NMTPreInitAllocationTable::operator new(size_t count) {
return raw_checked_malloc(count);
}
void NMTPreInitAllocationTable::operator delete(void* p) {
return raw_free(p);
}
NMTPreInitAllocationTable::NMTPreInitAllocationTable() {
::memset(_entries, 0, sizeof(_entries));
}
NMTPreInitAllocationTable::~NMTPreInitAllocationTable() {
// clear LU entries, but let payloads live!
for (int i = 0; i < table_size; i++) {
NMTPreInitAllocation* a = _entries[i];
while (a != nullptr) {
NMTPreInitAllocation* a2 = a->next;
delete a;
a = a2;
}
}
}
// print a string describing the current state
void NMTPreInitAllocationTable::print_state(outputStream* st) const {
// Collect some statistics and print them
@@ -116,7 +143,7 @@ void NMTPreInitAllocationTable::print_map(outputStream* st) const {
for (int i = 0; i < table_size; i++) {
st->print("[%d]: ", i);
for (NMTPreInitAllocation* a = _entries[i]; a != nullptr; a = a->next) {
st->print( PTR_FORMAT "(" SIZE_FORMAT ") ", p2i(a->payload()), a->size);
st->print( PTR_FORMAT "(" SIZE_FORMAT ") ", p2i(a->payload), a->size);
}
st->cr();
}
@@ -132,7 +159,7 @@ void NMTPreInitAllocationTable::verify() const {
for (index_t i = 0; i < table_size; i++) {
int len = 0;
for (const NMTPreInitAllocation* a = _entries[i]; a != nullptr; a = a->next) {
index_t i2 = index_for_key(a->payload());
index_t i2 = index_for_key(a->payload);
assert(i2 == i, "wrong hash");
assert(a->size > 0, "wrong size");
len++;
@@ -167,8 +194,7 @@ unsigned NMTPreInit::_num_frees_pre = 0;
void NMTPreInit::create_table() {
assert(_table == nullptr, "just once");
void* p = raw_malloc(sizeof(NMTPreInitAllocationTable));
_table = new(p) NMTPreInitAllocationTable();
_table = new NMTPreInitAllocationTable;
}
// Allocate with os::malloc (hidden to prevent having to include os.hpp)
@@ -178,9 +204,23 @@ void* NMTPreInit::do_os_malloc(size_t size, MEMFLAGS memflags) {
// Switches from NMT pre-init state to NMT post-init state;
// in post-init, no modifications to the lookup table are possible.
void NMTPreInit::pre_to_post() {
void NMTPreInit::pre_to_post(bool nmt_off) {
assert(!MemTracker::is_initialized(), "just once");
DEBUG_ONLY(verify();)
if (nmt_off) {
// NMT is disabled.
// Since neither pre- nor post-init-allocations use headers, from now on any pre-init allocation
// can be handled directly by os::realloc or os::free.
// We also can get rid of the lookup table.
// Note that we deliberately leak the headers (NMTPreInitAllocation) in order to speed up startup.
// That may leak about 12KB of memory for ~500 surviving pre-init allocations, which is a typical
// number. This is a compromise to keep the coding simple and startup time short. It could very
// easily improved by keeping a header pool, similar to metaspace ChunkHeaderPool. But since NMTPreInit
// had been critizised as "too complicated", I try to keep things short and simple.
delete _table;
_table = nullptr;
}
}
#ifdef ASSERT

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 SAP SE. All rights reserved.
* Copyright (c) 2022, 2023 SAP SE. All rights reserved.
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -121,19 +121,18 @@ class outputStream;
struct NMTPreInitAllocation {
NMTPreInitAllocation* next;
const size_t size; // (inner) payload size without header
// <-- USER ALLOCATION (PAYLOAD) STARTS HERE -->
void* const payload;
NMTPreInitAllocation(size_t size) : next(nullptr), size(size) {};
// Returns start of the user data area
void* payload() { return this + 1; }
const void* payload() const { return this + 1; }
NMTPreInitAllocation(size_t s, void* p) : next(nullptr), size(s), payload(p) {}
// These functions do raw-malloc/realloc/free a C-heap block of given payload size,
// preceded with a NMTPreInitAllocation header.
static NMTPreInitAllocation* do_alloc(size_t payload_size);
static NMTPreInitAllocation* do_reallocate(NMTPreInitAllocation* old, size_t new_payload_size);
static void do_free(NMTPreInitAllocation* p);
static NMTPreInitAllocation* do_reallocate(NMTPreInitAllocation* a, size_t new_payload_size);
static void do_free(NMTPreInitAllocation* a);
void* operator new(size_t l);
void operator delete(void* p);
};
class NMTPreInitAllocationTable {
@@ -170,22 +169,23 @@ class NMTPreInitAllocationTable {
NMTPreInitAllocation** find_entry(const void* p) {
const unsigned index = index_for_key(p);
NMTPreInitAllocation** aa = (&(_entries[index]));
while ((*aa) != nullptr && (*aa)->payload() != p) {
while ((*aa) != nullptr && (*aa)->payload != p) {
aa = &((*aa)->next);
}
assert((*aa) == nullptr || p == (*aa)->payload(),
assert((*aa) == nullptr || p == (*aa)->payload,
"retrieve mismatch " PTR_FORMAT " vs " PTR_FORMAT ".",
p2i(p), p2i((*aa)->payload()));
p2i(p), p2i((*aa)->payload));
return aa;
}
public:
NMTPreInitAllocationTable();
~NMTPreInitAllocationTable();
// Adds an entry to the table
void add(NMTPreInitAllocation* a) {
void* payload = a->payload();
void* payload = a->payload;
const unsigned index = index_for_key(payload);
assert(a->next == nullptr, "entry already in table?");
a->next = _entries[index]; // add to front
@@ -212,6 +212,9 @@ public:
void print_state(outputStream* st) const;
DEBUG_ONLY(void print_map(outputStream* st) const;)
DEBUG_ONLY(void verify() const;)
void* operator new(size_t l);
void operator delete(void* p);
};
// NMTPreInit is the outside interface to all of NMT preinit handling.
@@ -225,6 +228,7 @@ class NMTPreInit : public AllStatic {
static unsigned _num_frees_pre; // Number of pre-init frees
static void create_table();
static void delete_table();
static void add_to_map(NMTPreInitAllocation* a) {
assert(!MemTracker::is_initialized(), "lookup map cannot be modified after NMT initialization");
@@ -254,7 +258,7 @@ public:
// Switches from NMT pre-init state to NMT post-init state;
// in post-init, no modifications to the lookup table are possible.
static void pre_to_post();
static void pre_to_post(bool nmt_off);
// Called from os::malloc.
// Returns true if allocation was handled here; in that case,
@@ -266,7 +270,7 @@ public:
// Allocate entry and add address to lookup table
NMTPreInitAllocation* a = NMTPreInitAllocation::do_alloc(size);
add_to_map(a);
(*rc) = a->payload();
(*rc) = a->payload;
_num_mallocs_pre++;
return true;
}
@@ -281,35 +285,46 @@ public:
return handle_malloc(rc, new_size);
}
new_size = MAX2((size_t)1, new_size); // realloc(.., 0)
if (!MemTracker::is_initialized()) {
// pre-NMT-init:
// - the address must already be in the lookup table
// - find the old entry, remove from table, reallocate, add to table
NMTPreInitAllocation* a = find_and_remove_in_map(old_p);
a = NMTPreInitAllocation::do_reallocate(a, new_size);
add_to_map(a);
(*rc) = a->payload();
_num_reallocs_pre++;
return true;
} else {
// post-NMT-init:
// If the old block was allocated during pre-NMT-init, we must relocate it: the
// new block must be allocated with "normal" os::malloc.
// We do this by:
// - look up (but not remove! lu table is read-only here.) the old entry
// - allocate new memory via os::malloc()
// - manually copy the old content over
// - return the new memory
// - The lu table is readonly so we keep the old address in the table. And we leave
// the old block allocated too, to prevent the libc from returning the same address
// and confusing us.
const NMTPreInitAllocation* a = find_in_map(old_p);
if (a != nullptr) { // this was originally a pre-init allocation
void* p_new = do_os_malloc(new_size, memflags);
::memcpy(p_new, a->payload(), MIN2(a->size, new_size));
(*rc) = p_new;
switch (MemTracker::tracking_level()) {
case NMT_unknown: {
// pre-NMT-init:
// - the address must already be in the lookup table
// - find the old entry, remove from table, reallocate, add to table
NMTPreInitAllocation* a = find_and_remove_in_map(old_p);
a = NMTPreInitAllocation::do_reallocate(a, new_size);
add_to_map(a);
(*rc) = a->payload;
_num_reallocs_pre++;
return true;
}
break;
case NMT_off: {
// post-NMT-init, NMT *disabled*:
// Neither pre- nor post-init-allocation use malloc headers, therefore we can just
// relegate the realloc to os::realloc.
return false;
}
break;
default: {
// post-NMT-init, NMT *enabled*:
// Pre-init allocation does not use malloc header, but from here on we need malloc headers.
// Therefore, the new block must be allocated with os::malloc.
// We do this by:
// - look up (but don't remove! lu table is read-only here.) the old entry
// - allocate new memory via os::malloc()
// - manually copy the old content over
// - return the new memory
// - The lu table is readonly, so we keep the old address in the table. And we leave
// the old block allocated too, to prevent the libc from returning the same address
// and confusing us.
const NMTPreInitAllocation* a = find_in_map(old_p);
if (a != nullptr) { // this was originally a pre-init allocation
void* p_new = do_os_malloc(new_size, memflags);
::memcpy(p_new, a->payload, MIN2(a->size, new_size));
(*rc) = p_new;
return true;
}
}
}
return false;
}
@@ -320,25 +335,36 @@ public:
if (p == nullptr) { // free(null)
return true;
}
if (!MemTracker::is_initialized()) {
// pre-NMT-init:
// - the allocation must be in the hash map, since all allocations went through
// NMTPreInit::handle_malloc()
// - find the old entry, unhang from map, free it
NMTPreInitAllocation* a = find_and_remove_in_map(p);
NMTPreInitAllocation::do_free(a);
_num_frees_pre++;
return true;
} else {
// post-NMT-init:
// - look up (but not remove! lu table is read-only here.) the entry
// - if found, we do nothing: the lu table is readonly, so we keep the old address
// in the table. We leave the block allocated to prevent the libc from returning
// the same address and confusing us.
// - if not found, we let regular os::free() handle this pointer
if (find_in_map(p) != nullptr) {
switch (MemTracker::tracking_level()) {
case NMT_unknown: {
// pre-NMT-init:
// - the allocation must be in the hash map, since all allocations went through
// NMTPreInit::handle_malloc()
// - find the old entry, unhang from map, free it
NMTPreInitAllocation* a = find_and_remove_in_map(p);
NMTPreInitAllocation::do_free(a);
_num_frees_pre++;
return true;
}
break;
case NMT_off: {
// post-NMT-init, NMT *disabled*:
// Neither pre- nor post-init-allocation use malloc headers, therefore we can just
// relegate the realloc to os::realloc.
return false;
}
break;
default: {
// post-NMT-init, NMT *enabled*:
// - look up (but don't remove! lu table is read-only here.) the entry
// - if found, we do nothing: the lu table is readonly, so we keep the old address
// in the table. We leave the block allocated to prevent the libc from returning
// the same address and confusing us.
// - if not found, we let regular os::free() handle this pointer
if (find_in_map(p) != nullptr) {
return true;
}
}
}
return false;
}

View File

@@ -78,7 +78,7 @@ TEST_VM(NMTPreInit, stress_test_map) {
// look them all up
for (int i = 0; i < num_allocs; i++) {
const NMTPreInitAllocation* a = table.find(allocations[i]->payload());
const NMTPreInitAllocation* a = table.find(allocations[i]->payload);
ASSERT_EQ(a, allocations[i]);
}
@@ -86,7 +86,7 @@ TEST_VM(NMTPreInit, stress_test_map) {
for (int j = 0; j < num_allocs/2; j++) {
int pos = os::random() % num_allocs;
NMTPreInitAllocation* a1 = allocations[pos];
NMTPreInitAllocation* a2 = table.find_and_remove(a1->payload());
NMTPreInitAllocation* a2 = table.find_and_remove(a1->payload);
ASSERT_EQ(a1, a2);
NMTPreInitAllocation* a3 = NMTPreInitAllocation::do_reallocate(a2, small_random_nonzero_size());
table.add(a3);
@@ -97,13 +97,13 @@ TEST_VM(NMTPreInit, stress_test_map) {
// look them all up
for (int i = 0; i < num_allocs; i++) {
const NMTPreInitAllocation* a = table.find(allocations[i]->payload());
const NMTPreInitAllocation* a = table.find(allocations[i]->payload);
ASSERT_EQ(a, allocations[i]);
}
// free all
for (int i = 0; i < num_allocs; i++) {
NMTPreInitAllocation* a = table.find_and_remove(allocations[i]->payload());
NMTPreInitAllocation* a = table.find_and_remove(allocations[i]->payload);
ASSERT_EQ(a, allocations[i]);
NMTPreInitAllocation::do_free(a);
allocations[i] = NULL;

View File

@@ -173,46 +173,46 @@ public class NMTInitializationTest {
output.shouldContain("NMT initialized: " + nmtMode.name());
output.shouldContain("Preinit state:");
String regex = ".*entries: (\\d+).*sum bytes: (\\d+).*longest chain length: (\\d+).*";
output.shouldMatch(regex);
String line = output.firstMatch(regex, 0);
if (line == null) {
throw new RuntimeException("expected: " + regex);
}
System.out.println(line);
Pattern p = Pattern.compile(regex);
Matcher mat = p.matcher(line);
mat.matches();
int entries = Integer.parseInt(mat.group(1));
int sum_bytes = Integer.parseInt(mat.group(2));
int longest_chain = Integer.parseInt(mat.group(3));
System.out.println("found: " + entries + " - " + sum_bytes + longest_chain + ".");
if (nmtMode != NMTMode.off) { // in OFF mode LU table is deleted after VM initialization, nothing to see there
String regex = ".*entries: (\\d+).*sum bytes: (\\d+).*longest chain length: (\\d+).*";
output.shouldMatch(regex);
String line = output.firstMatch(regex, 0);
if (line == null) {
throw new RuntimeException("expected: " + regex);
}
System.out.println(line);
Pattern p = Pattern.compile(regex);
Matcher mat = p.matcher(line);
mat.matches();
int entries = Integer.parseInt(mat.group(1));
int sum_bytes = Integer.parseInt(mat.group(2));
int longest_chain = Integer.parseInt(mat.group(3));
System.out.println("found: " + entries + " - " + sum_bytes + longest_chain + ".");
// Now we test the state of the internal lookup table, and through our assumptions about
// early pre-NMT-init allocations:
// The normal allocation count of surviving pre-init allocations is around 300-500, with the sum of allocated
// bytes of a few dozen KB. We check these boundaries (with a very generous overhead) to see if the numbers are
// way off. If they are, we may either have a leak or just a lot more allocations than we thought before
// NMT initialization. Both cases should be investigated. Even if the allocations are valid, too many of them
// stretches the limits of the lookup map, and therefore may cause slower lookup. We should then either change
// the coding, reducing the number of allocations. Or enlarge the lookup table.
// Now we test the state of the internal lookup table, and through our assumptions about
// early pre-NMT-init allocations:
// The normal allocation count of surviving pre-init allocations is around 300-500, with the sum of allocated
// bytes of a few dozen KB. We check these boundaries (with a very generous overhead) to see if the numbers are
// way off. If they are, we may either have a leak or just a lot more allocations than we thought before
// NMT initialization. Both cases should be investigated. Even if the allocations are valid, too many of them
// stretches the limits of the lookup map, and therefore may cause slower lookup. We should then either change
// the coding, reducing the number of allocations. Or enlarge the lookup table.
// Apply some sensible assumptions
if (entries > testMode.num_command_line_args + 2000) { // Note: normal baseline is 400-500
throw new RuntimeException("Suspiciously high number of pre-init allocations.");
}
if (sum_bytes > 128 * 1024 * 1024) { // Note: normal baseline is ~30-40KB
throw new RuntimeException("Suspiciously high pre-init memory usage.");
}
if (longest_chain > testMode.expected_max_chain_len) {
// Under normal circumstances, load factor of the map should be about 0.1. With a good hash distribution, we
// should rarely see even a chain > 1. Warn if we see exceedingly long bucket chains, since this indicates
// either that the hash algorithm is inefficient or we have a bug somewhere.
throw new RuntimeException("Suspiciously long bucket chains in lookup table.");
}
// Apply some sensible assumptions
if (entries > testMode.num_command_line_args + 2000) { // Note: normal baseline is 400-500
throw new RuntimeException("Suspiciously high number of pre-init allocations.");
}
if (sum_bytes > 128 * 1024 * 1024) { // Note: normal baseline is ~30-40KB
throw new RuntimeException("Suspiciously high pre-init memory usage.");
}
if (longest_chain > testMode.expected_max_chain_len) {
// Under normal circumstances, load factor of the map should be about 0.1. With a good hash distribution, we
// should rarely see even a chain > 1. Warn if we see exceedingly long bucket chains, since this indicates
// either that the hash algorithm is inefficient or we have a bug somewhere.
throw new RuntimeException("Suspiciously long bucket chains in lookup table.");
}
// Finally, check that we see our final NMT report:
if (nmtMode != NMTMode.off) {
// Finally, check that we see our final NMT report:
output.shouldContain("Native Memory Tracking:");
output.shouldMatch("Total: reserved=\\d+, committed=\\d+.*");
}