// $Archive:: /Ti/inline/inline-heuristic.cc $ // $Date: Fri, 28 Jan 2005 02:37:42 -0800 $ // $Revision: 1.9.1.2.1.1.1.12 $ // Description: heuristics for method inlining // Copyright 2000, Dan Bonachea #include "inline.h" #include "inline-heuristic.h" #include "bitset.h" #include "is-main.h" #include "utils.h" #include "code-util.h" #include "string-utils.h" extern int compile_verbose_level; //------------------------------------------------------------------------------------ // methods which are always considered to be declared with an "inline" keyword static bool implicitInlineHint(Decl *md) { if (md->category() == Decl::Constructor && // compiler-generated default constructors md->modifiers() & Common::CompilerGenerated) return true; string fullname = md->fullName(); if (fullname == "java.lang.Object") return true; // Object constructor if (hasPrefix(fullname, "java.lang.Math.")) return true; // anything in java.lang.Math return false; } //------------------------------------------------------------------------------------ // a simple inlining heuristic //------------------------------------------------------------------------------------ bool manualHeuristic::shouldInline(TreeNode *callsite) { // simple inlining policy - only calls to non-virtual, non-directly recursive methods // that were manually marked as inline by the user, or implictly hinted by predicate above if (!canCallStatically(callsite)) return false; // uncertain callee //TreeNode* caller = getSiteCaller(callsite); TreeNode* callee = getSiteCallee(callsite); if (callee->flags() & Common::Native) return false; // don't inline native methods (yet) if (callee->decl()->modifiers() & (Common::Inline) || // marked as inline by user implicitInlineHint(callee->decl())) { // or specially-recognized method if (isDirectlyRecursive( callee )) return false; // directly recursive call return true; } return false; } //------------------------------------------------------------------------------------ // following functions do the required analysis for ManualHeuristic // we lazily analyze methods to discover direct recursion, cache the answer // and update the answer whenever it might have changed (after an inlining operation) // inlineInfo ptr gets auto-initialized to NULL, so leverage that and save some memory #define IS_VALID 1 #define IS_DIRECTLY_RECURSIVE 2 bool manualHeuristic::isDirectlyRecursive(TreeNode *method) { assert(isProcedure(method)); intptr_t status = (intptr_t)method->inlineInfo(); if (status & IS_VALID) return !!(status & IS_DIRECTLY_RECURSIVE); updateInfo(method); status = (intptr_t)method->inlineInfo(); assert(status & IS_VALID); return !!(status & IS_DIRECTLY_RECURSIVE); } static intptr_t status; static TreeNode *checkForRecursion(TreeNode *callsite) { TreeNode *caller = getSiteCaller(callsite); llist* callees = getSiteCallees(callsite); if (find(caller, callees)) status |= IS_DIRECTLY_RECURSIVE; free_all(callees); return NULL; } void manualHeuristic::updateInfo(TreeNode *method) { assert(isProcedure(method)); ensureLowered(method); // make sure it's been lowered status = IS_VALID; depthFirstSearch(method, isProcedureCallSite, checkForRecursion); method->inlineInfo((void *)status); } void manualHeuristic::update(TreeNode *callSiteInlined, TreeNode *replacementCode) { TreeNode *caller = getSiteCaller(callSiteInlined); updateInfo(caller); // may be directly recursive now after update } //------------------------------------------------------------------------------------ // a more sophisticated inlining heuristic //------------------------------------------------------------------------------------ const int complexHeuristic::smallMethodSize = 100; TreeNode *processNewSites(TreeNode *callsite) { // merge in information from new sites created during inlining // update caller TreeNode* caller = getSiteCaller(callsite); complexHeuristic::inlineInfo *callerii = ii(caller); push(callerii->directCallSites, callsite); // update callees llist* callees = getSiteCallees(callsite); foreach(callee, llist, *callees) { complexHeuristic::inlineInfo *calleeii = ii(*callee); push(calleeii->directCallerSites, callsite); if (!find(caller, calleeii->callers)) push(calleeii->callers, caller); } free_all(callees); return NULL; } //------------------------------------------------------------------------------------ void complexHeuristic::processWorklist(llist *worklist) { // destructively consume worklist to solve backward DFA while (worklist) { TreeNode *method = worklist->front(); worklist = worklist->free(); inlineInfo *ii = ii(method); Bitset *callReaches = ii->callReaches; Bitset oldcallReaches(bitMaskWidth); oldcallReaches.un(callReaches); // copy current bitmask // merge in successors foreach (callee, llist, *(ii->callees)) { inlineInfo *calleeii = ii(*callee); callReaches->un(calleeii->callReaches); // merge } if (!callReaches->equal(&oldcallReaches)) { // our state changed // propagate to our callers foreach (caller, llist, *(ii->callers)) { if (!find(*caller, worklist) && *caller != method) worklist = cons(*caller, worklist); } } } } //------------------------------------------------------------------------------------ llist *complexHeuristic::buildCGRevPostOrder(TreeNode *method, int visitedidx) { inlineInfo *ii = ii(method); if (ii->visited == visitedidx) return NULL; // already visited this traversal ii->visited = visitedidx; // visit now llist *children = copylist(ii->callees); llist *returnList = NULL; while (children) { TreeNode* nextchild = NULL; if (maxChildPostOrdering) { // pick the child with the most callees to appear first in list int maxcallees = -1; TreeNode* maxchild = NULL; foreach (child, llist, *children) { int thiscnt = ii(*child)->callees->size(); if (thiscnt > maxcallees) { maxchild = *child; maxcallees = thiscnt; } } assert(maxcallees >= 0 && maxchild); nextchild = maxchild; children = remove(maxchild, children); } else { // choose in random order nextchild = children->front(); children = children->free(); } returnList = extend(buildCGRevPostOrder(nextchild, visitedidx), returnList); } return cons(method, returnList); } //------------------------------------------------------------------------------------ llist *complexHeuristic::buildCGPostOrder(TreeNode *rootMethod) { // return post-order traversal of the call graph, starting from rootMethod static int lastVisitedIdx = 0; // assume all visited bits start at zero assert(rootMethod != NULL); return dreverse(buildCGRevPostOrder(rootMethod, ++lastVisitedIdx)); } //------------------------------------------------------------------------------------ int complexHeuristic::sizeEstimate(TreeNode *t) { // return an estimate of the executable size that will result from the subtree rooted at t int sz = 0; if (t->isTypeNode()) // type nodes have no runtime representation and no interesting children return 0; for (int childidx=0; childidx < t->arity(); childidx++) { sz += sizeEstimate(t->child(childidx)); } // as a first approximation, just count the nodes return sz + 1; } //------------------------------------------------------------------------------------ static complexHeuristic *currentInit = NULL; // the complex object currently being initialized TreeNode *initSiteInfo(TreeNode *callsite) { // init site-specific local info TreeNode *caller = getSiteCaller(callsite); llist* callees = getSiteCallees(callsite); complexHeuristic::inlineInfo *callerii = ii(caller); push(callerii->directCallSites, callsite); foreach(callee, llist, *callees) { complexHeuristic::inlineInfo *calleeii; calleeii = ii(*callee); if (calleeii == NULL) initMethodInfo(*callee); // callee hasn't been seen yet calleeii = ii(*callee); assert(calleeii != NULL); // update caller if (!find(*callee, callerii->callees)) { push(callerii->callees, *callee); } // update callee push(calleeii->directCallerSites, callsite); if (!find(caller, calleeii->callers)) { push(calleeii->callers, caller); } } free_all(callees); return NULL; } //------------------------------------------------------------------------------------ void initMethodInfo(TreeNode *method) { assert(!ii(method)); ensureLowered(method); // alloc method local data structures complexHeuristic::inlineInfo *ii = new complexHeuristic::inlineInfo(); method->inlineInfo(ii); currentInit->bitMaskWidth++; // setup method meta-data // decide if this method will be code-genned TreeNode *p = method->parent(); while (p && !isCompileUnitNode(p)) p = p->parent(); assert(p != NULL); ii->willBeCodeGenned = p->selectedForCodeGen(false); // discover non-library native methods that limit DCE if (method->flags() & Common::Native) { bool isLibraryMethod = false; if (!p->package()->absent()) { if (isInLibrary(p->package()->decl())) isLibraryMethod = true; } if (!isLibraryMethod) currentInit->hasUserNativeMethods = true; } // initialize codeSize info over live methods int size = complexHeuristic::sizeEstimate(method); ii->codeSize = size; if (ii->willBeCodeGenned) { currentInit->totalCodeSize += size; } // process calls and init call info depthFirstSearch(method, isProcedureCallSite, initSiteInfo); } //------------------------------------------------------------------------------------ void complexHeuristic::outputMethodInfo(TreeNode *method) { inlineInfo *ii = ii(method); cout << getMethodShortName(method) << ": " << "codeSize=" << ii->codeSize << (method->decl()->modifiers() & Common::Inline?" inline":"") << (method->flags() & Common::Native?" native":"") << (ii->willBeCodeGenned?" codegenned":"") << endl; cout << "callers: " << printMethodList(ii->callers) << endl; cout << "callees: " << printMethodList(ii->callees) << endl; cout << "reachable methods: "; Bitset *b = ii->callReaches; int num = 0; for (int i=0; i < bitMaskWidth; i++) { if (b->test(i) && !find((TreeNode *)reachNode[i], ii->callees)) { // only include those which are not directly called #if 0 if (num) cout << ", "; cout << getMethodShortName(reachNode[i]); #endif num++; } } cout << "(" << ii->callees->size() << " direct, " << num << " indirect)" << endl; } //------------------------------------------------------------------------------------ static bool nonLeafMethod(TreeNode * const & md) { return !complexHeuristic::isLeafMethod(md); } void complexHeuristic::initializePolicy() { considerSitesCreatedByInlining = true; // complex traverses in post-order for apps, but still want this for libraries maxInliningDepth = opt_inline_maxdepth; nextMethodChecked = false; // we inline methods from libraries bitMaskWidth = 0; totalCodeSize = 0; currentInit = this; if (codeGen_main) { // get a post-ordering of the methods in the call graph SystemEntry = (MethodDeclNode*)(*mainMethods.begin())->source(); //SystemEntry = new MethodDeclNode(NULL, NULL, NULL, NULL, NULL, NULL, NULL); // TODO: add other system entry points: static class initializers initMethodInfo(SystemEntry); methodList = buildCGPostOrder(SystemEntry); // we only care about reachable methods } else { SystemEntry = NULL; // library has too many entry points for post-order to be useful // (all public and protected methods & constructors) methodList = buildMethodList(true); // need to include all methods we might possibly call foreach (method, llist, *methodList) { if (ii(*method) == NULL) initMethodInfo(*method); } } totalCodeSizeInitial = totalCodeSize; // remember for later currentInit = NULL; assert(bitMaskWidth == (int)methodList->size()); reachNode = (MethodDeclNode **) (new MethodDeclNode *[bitMaskWidth]); int i = 0; // allocate Bitsets & assign ids foreach (method, llist, *methodList) { inlineInfo *ii = ii(*method); ii->reachIdx = i; ii->callReaches = new Bitset(bitMaskWidth); reachNode[i] = (MethodDeclNode *)*method; i++; } // init call reaches to local calls foreach (method, llist, *methodList) { inlineInfo *ii = ii(*method); llist* callees = ii->callees; while (callees) { int bitidx = ii(callees->front())->reachIdx; ii->callReaches->set(bitidx); callees = callees->tail(); } } // destructively process worklist to solve DFA llist* worklist = copylist(methodList); processWorklist(worklist); // output results of analysis for debugging & tuning if (inline2Debug) { llist* p = methodList; while (p) { outputMethodInfo(p->front()); cout << endl; p = p->tail(); } } // remove leaf methods from the processing list (they have no call sites to inline) methodList = destructive_filter(nonLeafMethod, methodList); } //------------------------------------------------------------------------------------ TreeNode *complexHeuristic::nextMethod() { // decide the next method to be processed // use post-order traversal on call graph if (methodList) { TreeNode *next = methodList->front(); methodList = methodList->free(); return next; } else return NOMORE; // all live methods have been processed } //------------------------------------------------------------------------------------ #define REJECT(reason) do { if (inlineStats||compile_verbose_level) rejectReason[string(reason)]++; return false; } while(0) #define ACCEPT(reason) do { if (inlineStats||compile_verbose_level) acceptReason[string(reason)]++; return true; } while(0) bool complexHeuristic::shouldInline(TreeNode *callsite) { // a more sophisticated inlining policy if (!canCallStatically(callsite)) REJECT("uncertain callee"); TreeNode* caller = getSiteCaller(callsite); TreeNode* callee = getSiteCallee(callsite); if (isDirectlyRecursive(callee)) REJECT("directly recursive callee"); // prevent non-termination if (callee->flags() & Common::Native) REJECT("native method"); // don't inline native methods (yet) inlineInfo *callerii = ii(caller); inlineInfo *calleeii = ii(callee); if (calleeii->directCallerSites->size() == 1) ACCEPT("single caller"); // this is the only call to this method if (callee->decl()->modifiers() & (Common::Inline)) ACCEPT("marked inline by user"); if(implicitInlineHint(callee->decl())) ACCEPT("implicitly marked as inline"); if (calleeii->codeSize <= smallMethodSize) ACCEPT("small callee"); // callee is smaller than the function call if (isNonLeafMethod(callee)) REJECT("non-leaf method"); // this one is safe, but probably not a good idea if (isRecursiveSite(callsite)) REJECT("callsite along recursive cycle"); if (callerii->willBeCodeGenned) { // calculate an estimate of current codeBloat float codeBloat = (totalCodeSize - totalCodeSizeInitial + calleeii->codeSize) / ((float)totalCodeSizeInitial); if (codeBloat > maxCodeBloat) REJECT("code bloat limit exceeded"); ACCEPT("leaf method within bloat limit"); } else ACCEPT("leaf method in library code"); } //------------------------------------------------------------------------------------ void complexHeuristic::update(TreeNode *callSiteInlined, TreeNode *replacementCode) { TreeNode* caller = getSiteCaller(callSiteInlined); inlineInfo *callerii = ii(caller); TreeNode* callee = getSiteCallee(callSiteInlined); inlineInfo *calleeii = ii(callee); // update local info if ((callerii->directCallSites = remove(callSiteInlined, callerii->directCallSites)) == ((llist*)-1)) fatal_error(""); if ((calleeii->directCallerSites = remove(callSiteInlined, calleeii->directCallerSites)) == ((llist*)-1)) fatal_error(""); depthFirstSearch(replacementCode, isProcedureCallSite, processNewSites); // update caller's callees (because we possibly removed one by inlining) free_all(callerii->callees); callerii->callees = NULL; foreach(callsite, llist, *(callerii->directCallSites)) { llist* sitecallees = getSiteCallees(*callsite); foreach(sitecallee, llist, *sitecallees) { if (!find(*sitecallee, callerii->callees)) { push(callerii->callees, *sitecallee); } } free_all(sitecallees); } // update callee's callers (because we possibly removed one by inlining) free_all(calleeii->callers); calleeii->callers = NULL; foreach(callsite, llist, *(calleeii->directCallerSites)) { TreeNode *sitecaller = getSiteCaller(*callsite); if (!find(sitecaller, calleeii->callers)) { push(calleeii->callers, sitecaller); } } // update global info if (!find(callee, callerii->callees)) { // we just inlined the last call from caller to callee // reset to local calls callerii->callReaches->subtract(callerii->callReaches); foreach (callee, llist, *(callerii->callees)) { int idx = ii(*callee)->reachIdx; callerii->callReaches->set(idx); } // merge in caller's callees and propagate to caller's callers processWorklist(cons(caller, copylist(callerii->callers))); } // update codeSize estimates int sizeIncrease = sizeEstimate(replacementCode) - sizeEstimate(callSiteInlined); callerii->codeSize += sizeIncrease; // we don't charge for inline operations that inline into non-code genned code if (ii(caller)->willBeCodeGenned) { if (calleeii->callers->size() == 0) // method is now dead and is assumed to be removed totalCodeSize -= calleeii->codeSize; totalCodeSize += sizeIncrease; } } //------------------------------------------------------------------------------------ void complexHeuristic::finalizePolicy() { if (codeGen_main && performDME) { int count = 0; // dead method elimination (DME) // don't run DME on library - too many entry points and native upcalls // can't run DME if user has supplied native code - it could potentially call anything // just warn for now and make the optimization off by default if (hasUserNativeMethods) cerr << "WARNING: performing dead-method elimination in the presence of " "user-provided native methods. Native upcalls may lead to failures."; // DME may still remove live methods that are only called from static initializers // (since these don't appear in our call graph) but this should be rare llist *fullMethodList = buildMethodList(true); foreach (method, llist, *fullMethodList) { MethodDeclNode *mdn = (MethodDeclNode *)*method; inlineInfo *ii = ii(mdn); // may be NULL if method not in call graph if (!(mdn->flags() & Common::Native) && mdn != SystemEntry && (ii == NULL || (ii->willBeCodeGenned && !isTransitivelyCalledBy(mdn, SystemEntry)))) { mdn->decl()->isDead(true); compile_status(2, string("DME removing dead method: ") + getMethodShortName(mdn) + " (" + mdn->position().asString() + ")"); count++; } } if (count) compile_status(0, string(" dead-method elimination removed ") + int2string(count) + " dead method bodies."); free_all(fullMethodList); } if (inlineStats || compile_verbose_level) { compile_status(0, string(" inlining code bloat approximation: ") + Literal((float)((((float)(totalCodeSize - totalCodeSizeInitial))/totalCodeSizeInitial) * 100.0f)).asString() + " % increase"); compile_status(0, "-------------------------------------------------------"); compile_status(0, "Call-site acceptance reasons:"); for (map::const_iterator d = acceptReason.begin(); d != acceptReason.end(); d++) { compile_status(0, string("\t") + (*d).first + "\t" + int2string((*d).second) + "\t" + Literal((float)(((*d).second * 100.0f)/numSitesConsidered)).asString() + " %"); } compile_status(0, ""); compile_status(0, "Call-site rejection reasons:"); for (map::const_iterator d = rejectReason.begin(); d != rejectReason.end(); d++) { compile_status(0, string("\t") + (*d).first + "\t" + int2string((*d).second) + "\t" + Literal((float)(((*d).second * 100.0f)/numSitesConsidered)).asString() + " %"); } compile_status(0, "-------------------------------------------------------"); } } //------------------------------------------------------------------------------------ static InstancePool< complexHeuristicInlineInfo > *iiHeap = NULL; void complexHeuristic::initializeStrategy() { assert(!iiHeap); iiHeap = new InstancePool< complexHeuristicInlineInfo >(); } void complexHeuristic::finalizeStrategy() { assert(iiHeap != NULL); // cleanup memory usage - all inlineInfos and BitSets inlineInfo::resetHeap(); delete iiHeap; // release the free list iiHeap = NULL; } void complexHeuristicInlineInfo::resetHeap() { assert(iiHeap != NULL); iiHeap->reset(); } void *complexHeuristicInlineInfo::operator new(size_t size) { assert(iiHeap != NULL); assert(size == sizeof(complexHeuristicInlineInfo)); return iiHeap->alloc(); } void complexHeuristicInlineInfo::operator delete(void *buffer, size_t size) { assert(iiHeap != NULL); assert(size == sizeof(complexHeuristicInlineInfo)); iiHeap->release(buffer); } //------------------------------------------------------------------------------------