#ifndef _INCLUDE_ARRAY_ADDR_H_ #define _INCLUDE_ARRAY_ADDR_H_ #include "java_array_methods.h" #include "ti-memory.h" #include "titanium.h" #define JAVA_ARRAY_ADDR_( form, result, array, index, where ) \ do { \ JAVA_ARRAY_CHECK_ ## form ( (array), (index), 1, where ); \ FIELD_ADDR_ ## form ( (result), (array), data[0] ); \ INDEX_ ## form ( (result), (result), (index) ); \ } while (0) #define JAVA_ARRAY_ADDR_GLOBAL( result, array, index, where ) \ JAVA_ARRAY_ADDR_( GLOBAL, result, array, index, where ) #define JAVA_ARRAY_ADDR_LOCAL( result, array, index, where ) \ JAVA_ARRAY_ADDR_( LOCAL, result, array, index, where ) #define TI_ARRAY_ADDR_( form, elem, arity ) TI_ARRAY_ADDR_ ## form ## _ ## elem ## _ ## arity #define TI_ARRAY_ADDR_GLOBAL( elem, arity ) TI_ARRAY_ADDR_( GLOBAL, elem, arity ) #define TI_ARRAY_ADDR_LOCAL( elem, arity ) TI_ARRAY_ADDR_( LOCAL, elem, arity ) /* Ti array optimized "fast" random accesses: expand access as a direct inline macro to avoid the cost of indirection and copying of array struct - take advantage of the fact that arguments are all variables (ensured by the constraints on using FASTADDR) DOB 3/17/03 */ #if BOUNDS_CHECKING /* with bounds-checking we revert to the simple, slower mechanism */ #define TI_ARRAY_FASTADDR_GLOBAL TI_ARRAY_ADDR_GLOBAL #define TI_ARRAY_FASTADDR_LOCAL TI_ARRAY_ADDR_LOCAL #else #ifndef PFAST_MULT /* optimization: trading multiplies for branches in the case where we have strong reason to believe one of the operands could be 1 empirically shown to help performance on Power3 and x86 */ #define PFAST_MULT(a,b) (PREDICT_TRUE((b) == 1) ? (a) : (a)*(b) ) #endif #define _TI_ARRAY_FASTADDR1(type, result, baseptr, ARR, pt) do { \ register jint const delta0 = PFAST_MULT((pt - ARR.base[0]),ARR.sideFactors[0]); \ INDEX_##type(result, baseptr, PFAST_DIVIDE(delta0,ARR.stride[0])); \ } while (0) #define _TI_ARRAY_FASTADDR2(type, result, baseptr, ARR, pt) do { \ register jint const delta0 = (pt.x0 - ARR.base[0])*ARR.sideFactors[0]; \ register jint const delta1 = PFAST_MULT((pt.x1 - ARR.base[1]),ARR.sideFactors[1]); \ INDEX_##type(result, baseptr, PFAST_DIVIDE(delta0,ARR.stride[0]) + \ PFAST_DIVIDE(delta1,ARR.stride[1])); \ } while (0) #define _TI_ARRAY_FASTADDR3(type, result, baseptr, ARR, pt) do { \ register jint const delta0 = (pt.x0 - ARR.base[0])*ARR.sideFactors[0]; \ register jint const delta1 = (pt.x1 - ARR.base[1])*ARR.sideFactors[1]; \ register jint const delta2 = PFAST_MULT((pt.x2 - ARR.base[2]),ARR.sideFactors[2]); \ INDEX_##type(result, baseptr, PFAST_DIVIDE(delta0,ARR.stride[0]) + \ PFAST_DIVIDE(delta1,ARR.stride[1]) + \ PFAST_DIVIDE(delta2,ARR.stride[2])); \ } while (0) #define _TI_ARRAY_FASTBEST1(type, result, preparedbase, ARR, pt) do { \ INDEX_##type(result, preparedbase, pt); \ } while (0) #define _TI_ARRAY_FASTBEST2(type, result, preparedbase, ARR, pt) do { \ INDEX_##type(result, preparedbase, pt.x0*ARR.sideFactors[0] + pt.x1); \ } while (0) #define _TI_ARRAY_FASTBEST3(type, result, preparedbase, ARR, pt) do { \ INDEX_##type(result, preparedbase, pt.x0*ARR.sideFactors[0] + pt.x1*ARR.sideFactors[1] + pt.x2); \ } while (0) #define _TI_ARRAY_FASTBESTPREPARE1(type, result, ARR) do { \ INDEX_##type(result, ARR.A, -ARR.base[0]); \ } while (0) #define _TI_ARRAY_FASTBESTPREPARE2(type, result, ARR) do { \ INDEX_##type(result, ARR.A, -(ARR.base[0]*ARR.sideFactors[0] + ARR.base[1])); \ } while (0) #define _TI_ARRAY_FASTBESTPREPARE3(type, result, ARR) do { \ INDEX_##type(result, ARR.A, -(ARR.base[0]*ARR.sideFactors[0] + ARR.base[1]*ARR.sideFactors[1] + ARR.base[2])); \ } while (0) #define _TI_ARRAY_FASTADDR_HELPER_GLOBAL1(result, ARR, pt, where) _TI_ARRAY_FASTADDR1(GLOBAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTADDR_HELPER_GLOBAL2(result, ARR, pt, where) _TI_ARRAY_FASTADDR2(GLOBAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTADDR_HELPER_GLOBAL3(result, ARR, pt, where) _TI_ARRAY_FASTADDR3(GLOBAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTADDR_HELPER_LOCAL1(result, ARR, pt, where) _TI_ARRAY_FASTADDR1(LOCAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTADDR_HELPER_LOCAL2(result, ARR, pt, where) _TI_ARRAY_FASTADDR2(LOCAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTADDR_HELPER_LOCAL3(result, ARR, pt, where) _TI_ARRAY_FASTADDR3(LOCAL, result, ARR.A, ARR, pt) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_GLOBAL1(result, ARR) _TI_ARRAY_FASTBESTPREPARE1(GLOBAL, result, ARR) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_GLOBAL2(result, ARR) _TI_ARRAY_FASTBESTPREPARE2(GLOBAL, result, ARR) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_GLOBAL3(result, ARR) _TI_ARRAY_FASTBESTPREPARE3(GLOBAL, result, ARR) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_LOCAL1(result, ARR) _TI_ARRAY_FASTBESTPREPARE1(LOCAL, result, ARR) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_LOCAL2(result, ARR) _TI_ARRAY_FASTBESTPREPARE2(LOCAL, result, ARR) #define _TI_ARRAY_FASTBESTPREPARE_HELPER_LOCAL3(result, ARR) _TI_ARRAY_FASTBESTPREPARE3(LOCAL, result, ARR) #define _TI_ARRAY_FASTBESTADDR_HELPER_GLOBAL1(result, preparedbase, ARR, pt) _TI_ARRAY_FASTBEST1(GLOBAL, result, preparedbase, ARR, pt) #define _TI_ARRAY_FASTBESTADDR_HELPER_GLOBAL2(result, preparedbase, ARR, pt) _TI_ARRAY_FASTBEST2(GLOBAL, result, preparedbase, ARR, pt) #define _TI_ARRAY_FASTBESTADDR_HELPER_GLOBAL3(result, preparedbase, ARR, pt) _TI_ARRAY_FASTBEST3(GLOBAL, result, preparedbase, ARR, pt) #define _TI_ARRAY_FASTBESTADDR_HELPER_LOCAL1(result, preparedbase, ARR, pt) _TI_ARRAY_FASTBEST1(LOCAL, result, preparedbase, ARR, pt) #define _TI_ARRAY_FASTBESTADDR_HELPER_LOCAL2(result, preparedbase,ARR, pt) _TI_ARRAY_FASTBEST2(LOCAL, result, preparedbase, ARR, pt) #define _TI_ARRAY_FASTBESTADDR_HELPER_LOCAL3(result, preparedbase, ARR, pt) _TI_ARRAY_FASTBEST3(LOCAL, result, preparedbase, ARR, pt) #define TI_ARRAY_FASTADDR_GLOBAL(T,N) _TI_ARRAY_FASTADDR_HELPER_GLOBAL##N #define TI_ARRAY_FASTADDR_LOCAL(T,N) _TI_ARRAY_FASTADDR_HELPER_LOCAL##N #define TI_ARRAY_FASTBESTADDR_GLOBAL(T,N) _TI_ARRAY_FASTBESTADDR_HELPER_GLOBAL##N #define TI_ARRAY_FASTBESTADDR_LOCAL(T,N) _TI_ARRAY_FASTBESTADDR_HELPER_LOCAL##N /* prepares an adjusted pointer for using FASTBEST calculation it doesn't work for the general case it assumes that strides are all 1, and sidefactor[N-1] is 1 */ #define TI_ARRAY_FASTBESTPREPARE_GLOBAL(T,N) _TI_ARRAY_FASTBESTPREPARE_HELPER_GLOBAL##N #define TI_ARRAY_FASTBESTPREPARE_LOCAL(T,N) _TI_ARRAY_FASTBESTPREPARE_HELPER_LOCAL##N #ifndef _CONCAT #define _CONCAT_HELPER(a,b) a ## b #define _CONCAT(a,b) _CONCAT_HELPER(a,b) #endif #define TI_ARRAY_FASTCONV(N, result, base, ARR, pt) _CONCAT(_TI_ARRAY_FASTADDR,N)(LOCAL, result, base, ARR, pt) /* this macro is used by the scatter/gather methods in ti_array.c to access the FASTBEST calculation it doesn't work for the general case it assumes that strides are all 1, and sidefactor[N-1] is 1 */ #define TI_ARRAY_FASTBESTCONV(N, result, preparedbase, ARR, pt) _CONCAT(_TI_ARRAY_FASTBEST,N)(LOCAL, result, preparedbase, ARR, pt) #endif #endif /* !_INCLUDE_ARRAY_ADDR_H_ */