Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,9 @@
product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \
"do cleanup after every iteration of incremental inlining") \
\
product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \
"Inline fallback implementation of failed vector intrinsics") \
\
product(intx, LiveNodeCountInliningCutoff, 40000, \
"max number of live nodes in a method") \
range(0, max_juint / 8) \
Expand Down
25 changes: 25 additions & 0 deletions src/hotspot/share/opto/callGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,31 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal
return cg;
}

class LateInlineVectorCallGenerator : public LateInlineCallGenerator {
private:
CallGenerator* _fallback_cg;

public:
LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) :
LateInlineCallGenerator(method, intrinsic_cg), _fallback_cg(fallback_cg) {
assert(_fallback_cg != nullptr && _fallback_cg->is_parse(), "");
}

virtual bool is_vector_late_inline() const { return true; }

virtual JVMState* generate(JVMState* jvms) {
JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
CallGenerator* fallback = CallGenerator::for_late_inline(method(), _fallback_cg)->with_call_node(call_node());
Compile::current()->add_vector_late_inline(fallback);
return new_jvms;
}
};

CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) {
return new LateInlineVectorCallGenerator(m, intrinsic_cg, fallback_cg);
}


// Allow inlining decisions to be delayed
class LateInlineVirtualCallGenerator : public VirtualCallGenerator {
private:
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/callGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj {
// same but for method handle calls
virtual bool is_mh_late_inline() const { return false; }
virtual bool is_string_late_inline() const { return false; }
virtual bool is_vector_late_inline() const { return false; }
virtual bool is_boxing_late_inline() const { return false; }
virtual bool is_vector_reboxing_late_inline() const { return false; }
virtual bool is_virtual_late_inline() const { return false; }
Expand Down Expand Up @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj {
static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg);
static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses);
Expand Down
29 changes: 29 additions & 0 deletions src/hotspot/share/opto/compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) {
remove_useless_late_inlines( &_late_inlines, dead);
remove_useless_late_inlines( &_string_late_inlines, dead);
remove_useless_late_inlines( &_boxing_late_inlines, dead);
remove_useless_late_inlines( &_vector_late_inlines, dead);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead);

if (dead->is_CallStaticJava()) {
Expand Down Expand Up @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis
remove_useless_late_inlines( &_late_inlines, useful);
remove_useless_late_inlines( &_string_late_inlines, useful);
remove_useless_late_inlines( &_boxing_late_inlines, useful);
remove_useless_late_inlines( &_vector_late_inlines, useful);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful);
DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);)
}
Expand Down Expand Up @@ -694,6 +696,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_string_late_inlines(comp_arena(), 2, 0, nullptr),
_boxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_late_inlines(comp_arena(), 2, 0, nullptr),
_late_inlines_pos(0),
_has_mh_late_inlines(false),
_oom(false),
Expand Down Expand Up @@ -2173,6 +2176,25 @@ void Compile::shuffle_late_inlines() {
shuffle_array(*C, _late_inlines);
}

void Compile::process_vector_late_inlines() {
for (int i = 0; i < _vector_late_inlines.length(); i++) {
CallGenerator* cg = _vector_late_inlines.at(i);

// When a vector intrinsic fails, set_generator(cg) caches the
// LateInlineVectorCallGenerator on the call node to allow retries
// if IGVN optimizes the call node's inputs. If the call node is not
// on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal
// does not fire and the cached generator persists. Once _late_inlines
// drains and we commit to the fallback here, clear the stale generator
// to prevent a subsequent IGVN pass from re-registering the intrinsic
// attempt into _late_inlines alongside the fallback, which would create
// duplicate call_node entries.
cg->call_node()->as_CallJava()->set_generator(nullptr);
add_late_inline(cg);
}
_vector_late_inlines.clear();
}

// Perform incremental inlining until bound on number of live nodes is reached
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
TracePhase tp(_t_incrInline);
Expand Down Expand Up @@ -2230,6 +2252,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
print_method(PHASE_INCREMENTAL_INLINE_STEP, 3);

if (failing()) return;

if (_late_inlines.length() == 0) {
process_vector_late_inlines();
}
}

igvn_worklist()->ensure_empty(); // should be done with igvn
Expand Down Expand Up @@ -4585,6 +4611,9 @@ void Compile::log_inline_id(CallGenerator* cg) {
}

void Compile::log_inline_failure(const char* msg) {
if (inline_printer()->is_suspended()) {
return;
}
if (C->log() != nullptr) {
C->log()->inline_fail(msg);
}
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/share/opto/compile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ class Compile : public Phase {
GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations

GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
GrowableArray<CallGenerator*> _vector_late_inlines; // inline fallback implementation for failed intrinsics

int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending?
Expand Down Expand Up @@ -508,6 +509,12 @@ class Compile : public Phase {
InlinePrinter _inline_printer;

public:

void add_vector_late_inline(CallGenerator* cg) {
_vector_late_inlines.push(cg);
}
void process_vector_late_inlines();

void* barrier_set_state() const { return _barrier_set_state; }

InlinePrinter* inline_printer() { return &_inline_printer; }
Expand Down
15 changes: 15 additions & 0 deletions src/hotspot/share/opto/doCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,21 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
cg_intrinsic = cg;
cg = nullptr;
} else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) {
if (IncrementalInlineVector && allow_inline) {
// Try to late inline fallback implementation if intrinsification attempt fails.
CallGenerator* fallback_cg;
{
InlinePrinterSuspendScope guard(C->inline_printer());
fallback_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms,
true /*allow_inline*/, prof_factor,
speculative_receiver_type, false /*allow_intrinsics*/);
}
if (fallback_cg != nullptr && fallback_cg->is_parse()) {
return CallGenerator::for_vector_late_inline(callee, cg, fallback_cg);
}
// Fallback not inlineable by regular heuristics; fall through.
}
// Don't try to inline fallback implementation.
return CallGenerator::for_late_inline(callee, cg);
} else {
return cg;
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/opto/printinlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ outputStream* InlinePrinter::record(ciMethod* callee, JVMState* state, InliningR
if (!is_enabled()) {
return &_nullStream;
}
if (is_suspended()) {
return &_nullStream;
}
outputStream* stream = locate(state, callee)->add(result);
if (msg != nullptr) {
stream->print("%s", msg);
Expand Down
16 changes: 15 additions & 1 deletion src/hotspot/share/opto/printinlining.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class InlinePrinter {

Compile* C;

uint _suspend_depth;

// In case print inline is disabled, this null stream is returned from ::record()
nullStream _nullStream;

Expand All @@ -126,7 +128,7 @@ class InlinePrinter {
IPInlineSite _root{nullptr, 0};

public:
InlinePrinter(Compile* compile) : C(compile) {}
InlinePrinter(Compile* compile) : C(compile), _suspend_depth(0) {}

// Saves the result of an inline attempt of method at state.
// An optional string message with more details that is copied to the stream for this attempt. Pointer is not captured.
Expand All @@ -136,6 +138,18 @@ class InlinePrinter {

// Prints all collected inlining information to the given output stream.
void print_on(outputStream* tty) const;

bool is_suspended() const { return _suspend_depth > 0; }
void suspend() { _suspend_depth++; }
void resume() { assert(_suspend_depth > 0, "unbalanced resume"); _suspend_depth--; }
};

class InlinePrinterSuspendScope : public StackObj {
private:
InlinePrinter* const _printer;
public:
InlinePrinterSuspendScope(InlinePrinter* printer) : _printer(printer) { _printer->suspend(); }
~InlinePrinterSuspendScope() { _printer->resume(); }
};

#endif // PRINTINLINING_HPP
3 changes: 2 additions & 1 deletion test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
*/
public class TestVectorTest {
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
"-XX:-IncrementalInlineVector");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you add these flags here? Would the IR rules fail without?
Suggestion: can you have a run with and a run without the flag, and then show which IR rules are affected, guarding them with the flag?

}

@DontInline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ public static void testLongVectorUnsignedCondition() {
public static void main(String[] args) {
TestFramework testFramework = new TestFramework();
testFramework.setDefaultWarmup(10000)
.addFlags("--add-modules=jdk.incubator.vector")
.addFlags("--add-modules=jdk.incubator.vector",
"-XX:-IncrementalInlineVector")
Comment on lines +256 to +257

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question about flag here.

.addFlags("-XX:UseSVE=0")
.start();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1746,8 +1746,9 @@ public static void testMaskedCompareMaskNotNegative() {

public static void main(String[] args) {
TestFramework testFramework = new TestFramework();
testFramework.setDefaultWarmup(10000)
.addFlags("--add-modules=jdk.incubator.vector")
testFramework.setDefaultWarmup(5000)
.addFlags("--add-modules=jdk.incubator.vector",
"-XX:-IncrementalInlineVector")
Comment on lines +1750 to +1751

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question about flag here.

.start();
}
}