// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/ir.cpp,v 1.49 2002/01/14 10:18:55 xhshi Exp $
//



#include "defines.h"
#include <iostream.h>
#include <float.h>
#include <math.h>
#include "ir.h"
#include "expression.h"
#include "build_ir_routines.h"
#include "flow_graph.h"
#include "data_emitter.h"
#include "jit_runtime_support.h"
#include "o3_profiling.h"

#ifdef O3_VTune_Support
#include "..\ia32_o1_jit\vtune.h"	
#endif
unsigned next_global_id = 0;

#define ORP_DOES_AASTORE  // see also build_ir_routines.cpp
//#define TURN_OFF_BOUNDS  // see also opt_bound_elimination.cpp
#define CHECKCAST_SLOWCALL
//#define SAFE_D2F

///////////////////////////////////////////////////////////////////////////////////////
// Data structure and magic number calculator for long division by invariant divisor
///////////////////////////////////////////////////////////////////////////////////////
#if 1
ORPExport void *gc_malloc_fixed_code_for_class_loading(unsigned size);
#pragma pack(8)
typedef struct{
	union{
		uint64 m ;		//m'
		int64  m1 ;		//Signed
	} ;
	unsigned l ;
	unsigned sh1 ;
	unsigned sh ;		//Signed
	int      d_sign ;
	unsigned d_32 ;		// if divisior is 32-bit, it's zero, otherwise it's 1
	unsigned m_32 ;		// if divisor is 32-bit,
	unsigned sh2 ;		// for 32-bit
#ifdef ORP_POSIX
	unsigned never_used ;
#endif
	int64    divisor ;	//remeber it for lrem. 
	unsigned l_rem ;	//for remainder && fast 64/32
	unsigned m_rem ;	//for remainder && fast 64/32
	unsigned d_norm ;	//for remainder && fast 64/32
}MAGIC ;

unsigned my_ceil(double x, double y)
{
	double q = x/y ;
	unsigned qq = (unsigned)q ;
	if(fabs(qq-q)<0.001)
		return qq ;

	return (unsigned)ceil(q) ;
}

unsigned my_floor(double x, double y)
{
	double q = x/y ;
	unsigned qq = (unsigned)q ;
	if(fabs(qq-q)<0.001)
		return qq ;

	return (unsigned)floor(q) ;
}

const uint64 z64 = __UINT64_C(0x8000000000000000) ;
const double ln2 = log(2) ;
void magic(int64 d_in, MAGIC& r)
{
	uint64 d = d_in<0 ? -d_in: d_in ;//absolute of d_in ;
	int64 dd = (int64)d ;
	double log_d = log((double)dd) ;

	unsigned l_ceil = my_ceil(log_d,ln2) ;
	unsigned l = (l_ceil > 1)? l_ceil : 1 ;

	uint64 m = 0 ;
	uint64 t1 = z64%d ;
	if(t1 >= __UINT64_C(0x100000000)){ //i.e. l>=32 , d_in >= 32-bit
		uint64 q1 = z64/d ;
		unsigned l1 = 64-l ;
		unsigned l2 = l - (64-l) ;//l>32, st. l2>0. And l1,l2 < 32
		uint64 q2 = (((t1<<l1)/d)<<l2) + (((t1<<l1)%d)<<l2)/d ;
		m = (q1<<l) + q2 + 1 ;

	}else{
		m = ((z64/d)<<l) + (t1<<l)/d + 1 ;
	}
	int64 m1 = -(int64)(~m + 1) ;
	int d_sign = d_in>=0 ? 0 : -1 ;
	unsigned sh = l - 1 ;

	r.l = (unsigned)l ;
	r.m1 = -m1 ; // m1 < 0
	r.d_sign = d_sign ;
	r.sh = sh ;
	r.d_32 = 1 ;

	r.m_32 = 0 ;
}

void magic_32u(unsigned d, MAGIC& r)
{
	double log_d = log(d) ;
	//unsigned l = (unsigned)ceil(log_d/ln2) ;
	unsigned l = my_ceil(log_d,ln2) ;
	unsigned m = (unsigned)((__UINT64_C(0x100000000) * (uint64)((1<<l) - d))/(uint64)d + 1) ;
	assert(m) ;
	unsigned sh1 = l<1? l: 1 ;
	unsigned sh2 = l-1 > 0? l-1: 0 ;

	r.m_32 = m ;
	r.sh1 = sh1 ;
	r.sh2 = sh2 ;
	r.d_32 = 0 ;
}

void magic_rem(unsigned d, MAGIC& r)
{
	double log_d = log(d) ;
	//unsigned l = 1 + (unsigned)floor(log_d/ln2) ;
	unsigned l = 1 + my_floor(log_d,ln2) ;
	uint64 mm1 = ((uint64)1<<l) - (uint64)d ;
	uint64 mm2 = mm1 * ((uint64)1<<32) ;
	uint64 mm3 = mm2 - 1 ;
	uint64 mm = mm3/(uint64)d ;
	//uint64 mm = ((((UINT64_C(1)<<l)-(UINT64)d)<<32)-UINT64_C(1))/(UINT64)d ;	
	unsigned m = (unsigned)mm ;
	unsigned d_norm = (unsigned)(d<<(32-l)) ;

	r.d_norm = d_norm ;
	r.l_rem = l ;
	r.m_rem = m ;
}
#endif

int is_power2(int v) {
        //if ((v & (v-1)) != 0)
            //return -1;
    int i;
	for (i = 0; i < 32; i++) {
		if (v & 1){
			if ((v >> 1) == 0)
				return i;
			else 
				return -1;
		}
		v = v >> 1;
	}
        //assert(0);
	return -1;
}

static X86_Opnd_Size opnd_size(Inst *inst)
{
    X86_Opnd_Size sz;
    switch (inst->type())
    {
    case JIT_TYPE_BYTE:
    case JIT_TYPE_BOOLEAN:
        sz = opnd_8;
        break;
    case JIT_TYPE_CHAR:
    case JIT_TYPE_SHORT:
        sz = opnd_16;
        break;
    default:
        sz = opnd_32;
        break;
    }
    return sz;
}

void Const_Operand::print(ostream& cout) {
    if (type == JAVA_TYPE_FLOAT)
        cout << val.f;
    else if (type == JAVA_TYPE_DOUBLE)
        cout << val.d;
    else if (type == JAVA_TYPE_CLASS)
        cout << val.addr;
    else if (type == JAVA_TYPE_LONG) 
		cout << (void*)val.l.hi << "," << (void*)val.l.lo;
	else
        cout << val.i;
}

Widen_Inst::Widen_Inst(Operand *src, Exp *e, Inst *inst_head) :
    Inst(src,e,inst_head){
    O3_Jit_Type ty = /*type()*/src->type;  // assume widening
    if (ty == JIT_TYPE_INT) ty = e->type;  // narrowing
    //assert(ty == JIT_TYPE_BOOLEAN || ty == JIT_TYPE_BYTE || 
    //       ty == JIT_TYPE_SHORT   || ty == JIT_TYPE_CHAR);
    _is_signed = (ty != JIT_TYPE_CHAR);
    _is_half   = !IS_BYTE_SIZE(ty);
}

bool  Inst::is_64bit() {return IS_64BIT(type());}

O3_Jit_Type Inst::type() {return exp->type;}

#define FIX_HI_OPND_FOR_CONST(y) { \
    if (_srcs[y]->kind == Operand::Const) { \
        Const_Operand *con = (Const_Operand *)_srcs[y]; \
        _srcs[y] = con->lo(); \
        _srcs[y]->set_hi_opnd(con->hi()); \
        } } \

#define GET_HI_OPND(x,y) {         \
    FIX_HI_OPND_FOR_CONST(y) \
    x = _srcs[y]->hi_opnd();       \
    assert(x); }
    //if (x == NULL) x = _srcs[0]; } 

Inst *Inst::expand(Expressions& exprs) {
    if (_been_expanded || is_outgoing_arg_assignment()) return this;
    _been_expanded = 1;
    
    Inst *p = prev(), *n = next();  // for PRINTABLE_O3
    Inst *epnd = this;
    if (need_special_expansion()) {
        epnd = special_expansion(exprs);
    } else if (type() == JIT_TYPE_LONG) 
        epnd = default_long_expansion(exprs.mem);
    //
    // Since epnd won't be expanded after return, we expand epnd if necessary.
    // For instance, epnd may be a newly created long instruction
    //
    if (!epnd->_been_expanded) 
        epnd = epnd->expand(exprs);
    //
    // set bc_index that indicates the bytecode that this instruction originated from
    //
#ifdef PRINTABLE_O3
    Inst *i;
    for (i = epnd; i != n; i = i->next()) i->bc_index = bc_index;
#endif // PRINTABLE_O3
    return epnd;
}

static void fixup_expanded_inst(Inst *epnd, Operand *dst_hi, Operand *dst_lo) {
    epnd->set_dst(dst_hi);
    if (dst_hi->is_temp_reg()) {
        ((Temp_Reg*)dst_hi)->copy_attr_from((Temp_Reg*)dst_lo);
    }
}

Inst *Inst::default_long_expansion(Mem_Manager& mem) {
    if (_dst == NULL) return this;
    Operand *dst_hi = _dst->hi_opnd();
    if (dst_hi != NULL) { // not yet been expanded
        assert(n_srcs > 0 && n_srcs <= 3);
        Inst *epnd;
        Operand *s0_hi, *s1_hi, *s2_hi;
        GET_HI_OPND(s0_hi,0);
        if (n_srcs == 1)
            epnd = expand_inst(mem,dst_hi,s0_hi,exp,next());
        else {
            GET_HI_OPND(s1_hi,1);
            if (n_srcs == 2) 
                epnd = expand_inst(mem,dst_hi,s0_hi,s1_hi,exp,next());
            else {
                GET_HI_OPND(s2_hi,2);
                epnd = expand_inst(mem,dst_hi,s0_hi,s1_hi,s2_hi,exp,next());
            }
        }
        fixup_expanded_inst(epnd,dst_hi,_dst);
    }
    return this;
}

//
// get format string of the instruction and then print out the inst
//
void Inst::print(ostream& cout) {
	const char *fmt = fmt_string();
    const char *c = fmt; 
    assert(c) ;
    while(*c != '\0') {
        if (*c == '$') {
            c++; 
            if (*c == 't') {  // $t : print out dst
                _dst->print(cout);
            } else if (*c == 'y') { // $y : print out type
                cout << (char) type();
            } else if (*c == 'x') { // $x : escape
                escape(cout);
            } else {
                int nth_opnd = *c - '0';
				//::
				// The former ASSERT "assert(n_srcs > nth_opnd && *c >= '0' && *c <= '9');"
				// will fail in readbarrier+getstatic
				//::
				if(n_srcs == 1 && n_srcs == nth_opnd){
					assert(strncmp(fmt,"read_barrier",12)==0) ;
				}else{
					assert(n_srcs > nth_opnd && *c >= '0' && *c <= '9');
					_srcs[nth_opnd]->print(cout);
				}
            }
        } else
            cout << (*c);
        c++;
    }
}


void Temp_Reg::find_local_reg_cand(int src_in_reg, unsigned marker) {
    if (!global_reg_alloc_cand()){
        //
        // if get_no() != marker, then def is in different block
        // (may be caused by inlining).
        //
#if 0
        if (_no != marker) set_global_reg_cand();
        //
        // if src must be in register, then local reg alloc will try
        // to find a reg.
        //
        else 
#endif
            if (src_in_reg) set_assign_local_reg();
    }
}

bool Temp_Reg::is_foldable() {

    assert(_inst != NULL);

//#ifdef INLINE_NATIVE
    if(!get_foldable()) return false ;
//#endif
    //
    // we do folding if the temp register has only one definition
    //
    if (yyy._temp_reg_has_multiple_defs) return false;

    if (IS_FP_DBL_TYPE(type)) return false;

    if(_inst->is_imm_assignment() || _inst->is_const_assignment()) return true;

    if (_inst->is_reg_assignment()) {
        Operand *src = _inst->src(0);
        //
        // E.g. we cannot propagate fp_stk(0) to two different places because
        // fp_stk(0) will be popped after the first access.
        //
        if (yyy._global_reg_alloc_cand && src->is_fp_stk())
            return false;

        if (IS_FP_DBL_TYPE(src->type) &&
            _inst->dst()->type != src->type)
            return false;
        // 
        // If we fold ret or arg to temp regs that are live across blocks,
        // we extends the live ranges of the ret and args.  When arg and ret 
        // are passed in registers, long live ranges of arg and ret take up 
        // registers for too long and may cause unnecessary spilling.
        //
        if ( !yyy._global_reg_alloc_cand || 
            (src->is_single_def_temp_reg() && !src->is_ret() && !src->is_arg())) 
            return true;
    }
    if (_inst->n_srcs == 0 || _inst->src(0) == NULL)
        return false;
    return (!yyy._global_reg_alloc_cand && !yyy._assign_local_reg);
}

//
// return true, if the assignment is "i = (i + imm1) & imm2;
//
bool Assign_Inst::is_iinc_and() {
    if (!exp->is_inst_exp() || exp->op != Exp::Assign) return false;
    Exp* r_exp = ((Inst_Exp*)exp)->rght_child();
    //
    // check (x + y) & imm
    //
    if (r_exp->op != Exp::And || 
        ((Inst_Exp*)r_exp)->left_child()->op != Exp::Add ||
        ((Inst_Exp*)r_exp)->rght_child()->is_imm_exp() == false)
        return false;

    //
    // check i = (i + imm1) & imm2
    //
    Exp *l_exp = ((Inst_Exp*)r_exp)->left_child();
    return ((Inst_Exp*)l_exp)->left_child() == ((Inst_Exp*)exp)->left_child() &&
           ((Inst_Exp*)l_exp)->rght_child()->is_imm_exp();
}

//
// return true, if the assignment exp is "a.x = a.x + imm"
//
bool Assign_Inst::is_field_inc() {
    if (!exp->is_inst_exp() || exp->op != Exp::Assign) return false;
    Exp* r_exp = ((Inst_Exp*)exp)->rght_child();
    //
    // check x = x +/- imm
    //
    if ((r_exp->op != Exp::Add && r_exp->op != Exp::Sub) ||
        ((Inst_Exp*)exp)->left_child() != ((Inst_Exp*)r_exp)->left_child() ||
        ((Inst_Exp*)r_exp)->rght_child()->is_imm_exp() == false)
        return false;
    return true;
}

bool Assign_Inst::need_special_expansion() {
    if (_srcs[0]->is_arg() && _srcs[0]->type == JIT_TYPE_LONG)
        return true;
    if (_srcs[0]->is_ret() && IS_FP_DBL_TYPE(_srcs[0]->type))
        return true;
    if (_dst->is_ret() && IS_FP_DBL_TYPE(_dst->type))
        return true;
    if (_aastore_call)
        return true;
    return false;
}

Inst *Assign_Inst::special_expansion(Expressions& exprs) {
    Inst *epnd = this, *nxt = next();
    if (_srcs[0]->is_arg() && _srcs[0]->type == JIT_TYPE_LONG) {
        if (_srcs[0]->assigned_preg() == n_reg) {
            //
            // t2 = arg2   ===>    t2 = arg3 (frame layout arg2 contains hi 32)
            //                     t3 = arg2
            epnd = new(exprs.mem) Assign_Inst(_dst->hi_opnd(), _srcs[0], exp, nxt);
            _srcs[0] = _srcs[0]->hi_opnd();
            epnd->set_expanded();
        } else {
            //
            // if arg2 is passed in reg, then arg2 contains low-32 value
            //
            (new (exprs.mem) Assign_Inst(_dst->hi_opnd(),_srcs[0]->hi_opnd(),exp,nxt))->set_expanded();
        }
    } else if (_dst->is_ret() && IS_FP_DBL_TYPE(_dst->type)) {
        //
        // Ret.D = t   ===>  fp_stk(0) = t 
        // (float/double) are returned via fp stack.
        ((Ret_Operand*)_dst)->set_fp_return_loc(exprs.fp_stk_opnd(0));
        _dst = exprs.fp_stk_opnd(0);
    } else if (_srcs[0]->is_ret() && IS_FP_DBL_TYPE(_srcs[0]->type)) {
        // t = Ret.D  ===>  t = fp_stk(0)
        _srcs[0] = exprs.fp_stk_opnd(0);
        //dont_eliminate();  // XXX- hack.  If we want to eliminate this stmt, replace it with fstp st0 instead
    } else if (_aastore_call) {
        // Use the ORP helper function for aastore.
        // Push src, push index, push array, call helper.
        Inst **argarray = (Inst**)exprs.mem.alloc(3 * sizeof(*argarray));
        Array_Exp *array_exp = (Array_Exp*)((Inst_Exp*)exp)->left_child();
        argarray[0] = new(exprs.mem) Push_Inst(_dst->base(), array_exp->base, nxt);
        argarray[1] = new(exprs.mem) Push_Inst(_dst->index(), array_exp->indx, nxt);
        argarray[2] = new(exprs.mem) Push_Inst(_srcs[0], ((Inst_Exp*)exp)->rght_child(), nxt);
        Call_Inst *cll = new(exprs.mem) Call_Inst(Call_Inst::aastore_call, exp,
            NULL, false, nxt);
        cll->set_args(argarray, 3, NULL);
        epnd = cll;
        epnd->set_expanded();
        unlink();
    } else assert(0);
    return epnd;
}

void Assign_Inst::arg_expansion(Expressions& exprs,
                                Method_Handle mh,
                                Inst*& new_arg1,
                                Inst*& new_arg2) {
    assert(_dst->is_arg());
    Mem_Manager& mem = exprs.mem;

    new_arg1 = new_arg2 = NULL;
    Inst *nxt = next();
    //
    // arguments  are pushed onto stack
    //
    (new (mem) Push_Inst(_srcs[0],exp,nxt))->expand(exprs);
    Inst *i;
    for (i = next(); i != nxt; i = i->next())
        i->set_expanded();
    unlink();
    if (_srcs[0]->type == JIT_TYPE_LONG) {
        new_arg2 = nxt->prev();
        new_arg1 = new_arg2->prev();
    } else 
        new_arg1 = nxt->prev();
}

//
//                              push t4
//                              push t3
//                              push t2
//                              push t1
//    t5 =.J  t1 mul t3  ===>   call lmul_helper
//                              t5 = Ret.lo
//                              t6 = Ret.hi 
//
static Inst *long_mul_div_rem(Expressions& exprs, Inst *i, Call_Inst::Kind call) {
    if (i->type() != JIT_TYPE_LONG) return i;
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = i->next();

    Operand_Exp *ret = exprs.lookup_ret_exp(JIT_TYPE_LONG);
    Inst **argarray = (Inst**)mem.alloc(4 * sizeof(*argarray));
    argarray[1] = new (mem) Push_Inst(i->src(1),i->exp,nxt);
    argarray[0] = argarray[1]->special_expansion(exprs);
    argarray[3] = new (mem) Push_Inst(i->src(0),i->exp,nxt);
    argarray[2] = argarray[3]->special_expansion(exprs);
    int j;
    for (j = 0; j < 4; j++) argarray[j]->set_expanded();
	Call_Inst *cll = new (mem) Call_Inst(call,i->exp,NULL,false,nxt);
    Inst *retinst = new (mem) Assign_Inst(i->dst(),ret->opnd,i->exp,nxt);
    Inst *a;
    for (a = i->next(); a != retinst; a = a->next()) a->set_expanded();
    cll->set_args(argarray, 4, retinst);
    i->unlink(); // remove i from the instruction list
    return argarray[0];
}

static Inst *fp_rem(Expressions& exprs, Inst *rem)
{
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = rem->next();
	O3_Jit_Type ty = rem->type();

    Operand_Exp *ret = exprs.lookup_ret_exp(ty);
    Inst **argarray = (Inst**)mem.alloc(4 * sizeof(*argarray));
    argarray[0] = new (mem) Push_Inst(rem->src(0),rem->exp,nxt);
    argarray[1] = new (mem) Push_Inst(rem->src(1),rem->exp,nxt);
    argarray[2] = argarray[3] = NULL;
	Call_Inst::Kind call = (ty == JIT_TYPE_DOUBLE)? Call_Inst::drem_call : 
	                                                Call_Inst::frem_call;
    Call_Inst *cll = new (mem) Call_Inst(call,rem->exp,NULL,false,nxt);
    Inst *retinst = new (mem) Assign_Inst(rem->dst(),ret->opnd,rem->exp,nxt);
    Inst *a;
    for (a = rem->next(); a != retinst; a = a->next()) a->set_expanded();
    cll->set_args(argarray, 2, retinst);
    rem->unlink(); // remove rem from the instruction list
    return argarray[0];
}

//
// generate long helper function for lmul, ldiv and lrem
//
Inst *Mul_Inst::special_expansion(Expressions& exprs) {
//#ifdef  INLINE_NATIVE
	if( kind==Mul_Inst::smul)
		return this ;
	else if(IS_FP_DBL_TYPE(type()))
#if 0
    if (IS_FP_DBL_TYPE(type()))
#endif
        return this;
    else
	{
#if 0
		int64 val = -1;
        if (_srcs[1]->kind == Operand::Immediate) {
            val = ((Imm_Operand*)_srcs[1])->imm();
        } else if (_srcs[1]->is_single_def_temp_reg()) {
            Inst *asgn = ((Temp_Reg*)_srcs[1])->inst();
            if (asgn->is_imm_assignment()) 
                val = ((Imm_Operand*)asgn->src(0))->imm();
        } 

		if ((_srcs[1]->kind == Operand::Immediate || _srcs[1]->is_single_def_temp_reg()) && (val <= 0x7fffffff) && (val > 0))
			return long_mul_div_rem(exprs, this, Call_Inst::lmul_const_multiplier_call);
		else
#endif
			return long_mul_div_rem(exprs, this, Call_Inst::lmul_call);
	}
}

//
//                              push magic_number_offset
//                              push dividend.low 	(t1.low)
//                              push dividend.high	(t1.high)
//    t5 =.J  t1 div const ===> call const_ldiv_helper
//                              t5 = Ret.lo
//                              t6 = Ret.hi 
//
static Inst *const_long_div(Expressions& exprs, Inst *i, Call_Inst::Kind call, __int64 divisor) {
    assert(i->type() == JIT_TYPE_LONG) ;
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = i->next();

    Operand_Exp *ret = exprs.lookup_ret_exp(JIT_TYPE_LONG);
    Inst **argarray = (Inst**)mem.alloc(3 * sizeof(*argarray));
	//////////////////////////////////////////////////////
	//Count the magic number first
	//Magic* m = magic(divisor) ; //(*m) contains the magic numbers
	MAGIC* m = (MAGIC*)gc_malloc_fixed_code_for_class_loading(sizeof(MAGIC)) ;
	m->divisor = divisor ;
	m->d_32 = 0 ; //means we have not magic number of fast 64/32
	magic(divisor,*m) ;
	if((uint64)divisor < __UINT64_C(0x100000000)){ // Calulate 32-bit magic number
		//m->d_32 = (unsigned)divisor ;
		magic_32u((unsigned)divisor,*m) ;
		magic_rem((unsigned)divisor,*m) ;
	}
	//push m as the first argument -- argarray[0]
	Inst* imm = exprs.lookup_imm((unsigned)m,JIT_TYPE_INT,nxt) ;
	argarray[0] = new (mem) Push_Inst(imm->src(0),imm->exp,nxt) ; ;
    argarray[2] = new (mem) Push_Inst(i->src(0),i->exp,nxt);
    argarray[1] = argarray[2]->special_expansion(exprs);
    int j;
    for (j = 0; j < 3; j++) argarray[j]->set_expanded();
    Call_Inst *cll = new (mem) Call_Inst(call,i->exp,NULL,false,nxt);
    Inst *retinst = new (mem) Assign_Inst(i->dst(),ret->opnd,i->exp,nxt);
    Inst *a;
    for (a = i->next(); a != retinst; a = a->next()) a->set_expanded();
    cll->set_args(argarray, 3, retinst);
    i->unlink(); // remove i from the instruction list
    return argarray[0];
}

Inst *Div_Inst::special_expansion(Expressions& exprs) {
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = next();
    if (type() == JIT_TYPE_LONG) {

		//if the divisor is a constant,we could do more optimization
		int64 divisor64 = 0 ;
		unsigned* lower = (unsigned*)&divisor64 ;
		unsigned* higher = &lower[1]	;
		if (_srcs[1]->kind == Operand::Immediate) {
			*lower = ((Imm_Operand*)_srcs[1])->imm();
			assert(_srcs[1]->hi_opnd()) ;
			*higher = ((Imm_Operand*)_srcs[1]->hi_opnd())->imm()	;
		} else if (_srcs[1]->is_single_def_temp_reg()) {
			Inst *asgn = ((Temp_Reg*)_srcs[1])->inst();
			if (asgn->is_imm_assignment()){ 
				*lower = ((Imm_Operand*)asgn->src(0))->imm();
				*higher = ((Imm_Operand*)asgn->src(0)->hi_opnd())->imm();
			}
		}

		if(divisor64 != __INT64_C(0) && divisor64 != __INT64_C(1) && divisor64 != __INT64_C(-1)){
			if(kind==div)
				return const_long_div(exprs,this,Call_Inst::const_ldiv_call, divisor64) ;
			else if(kind==rem && (uint64)divisor64 < __UINT64_C(0x100000000))//divisor should be 32-bit
				return const_long_div(exprs,this,Call_Inst::const_lrem_call, divisor64) ;
		}

        if (kind == div){
            return long_mul_div_rem(exprs, this, Call_Inst::ldiv_call);
        }else if (kind == rem)
            return long_mul_div_rem(exprs, this, Call_Inst::lrem_call);
    } else if (type() == JIT_TYPE_INT) {
        assert(kind == div || kind == rem);
        //
        // determine if it is division by power of 2
        //
        int val = -1;
        if (_srcs[1]->kind == Operand::Immediate) {
            val = ((Imm_Operand*)_srcs[1])->imm();
        } else if (_srcs[1]->is_single_def_temp_reg()) {
            Inst *asgn = ((Temp_Reg*)_srcs[1])->inst();
            if (asgn->is_imm_assignment()) 
                val = ((Imm_Operand*)asgn->src(0))->imm();
        } 
        int n_shift = is_power2(val);
        Operand *eax = exprs.lookup_reg_exp(eax_reg,JIT_TYPE_INT,0)->opnd;
        Operand *edx = exprs.lookup_reg_exp(edx_reg,JIT_TYPE_INT,0)->opnd;
        //
        // generate     mov  eax, x
        //              cdq
        //              and  edx, 1f
        //              add  eax, edx
        //              sar  eax, 5
        //
        Inst *epnd;
        if (kind == div && n_shift != -1) { // a power of 2
            Operand_Exp *imm     = exprs.lookup_imm_exp(n_shift,JIT_TYPE_INT);
            if (n_shift == 1)
            {
                // eax = add  src0, 0x80000000
                // adc  eax, 0x80000000
                // sar  eax, 1
                Operand_Exp *imm80 = exprs.lookup_imm_exp(0x80000000, JIT_TYPE_INT);
                epnd = new (mem) Add_Inst(Add_Inst::add, _srcs[0], imm80->opnd, exp, nxt);
                epnd->set_dst(eax);
                (new (mem) Add_Inst(Add_Inst::adc, eax, imm80->opnd, exp, nxt))->set_dst(eax);
            }
            else
            {
                epnd = new (mem) Assign_Inst(eax,_srcs[0],exp,nxt);
                (new (mem) Native_Inst(Native_Inst::cdq,eax,exp,nxt))->set_dst(edx);
                Operand_Exp *imm_exp = exprs.lookup_imm_exp(val-1,JIT_TYPE_INT);
                (new (mem) Bitwise_Inst(Bitwise_Inst::k_and,edx,imm_exp->opnd,exp,nxt))->set_dst(edx);
                (new (mem) Add_Inst(Add_Inst::add, eax, edx, exp, nxt))->set_dst(eax);
            }
            (new (mem) Bitwise_Inst(Bitwise_Inst::sar,eax,imm->opnd,exp,nxt))->set_dst(_dst);
        } else {
            epnd = new (mem) Assign_Inst(eax,_srcs[0],exp,nxt);
            (new (mem) Native_Inst(Native_Inst::cdq,eax,exp,nxt))->set_dst(edx);
            //
            //                               eax = t2
            //                               edx = cdq eax  --- sign extend of eax
            //   t1 =.I  div t2, t3  ====>   idiv t3     --- quotient/remainder in eax/edx
            //                               t1  = eax/edx
            //
            Inst *i = new (mem) Native_Inst(Native_Inst::idiv,edx,eax,_srcs[1],exp,nxt);
            i->set_dst((kind == div)? eax : edx);
            new (mem) Assign_Inst(_dst,i->dst(),exp,nxt);
        }
        unlink();
        return epnd;
    } else if (IS_FP_DBL_TYPE(type())) {
        if (kind == fdiv)
            return this;
        else if (kind == frem) 
            return fp_rem(exprs, this);;
    }
    assert(0);
    return this;
}

Inst *Neg_Inst::special_expansion(Expressions& exprs) {
    O3_Jit_Type ty = type();
    Inst *nxt = next();
    Mem_Manager& mem = exprs.mem;
    if (ty == JIT_TYPE_LONG) {
	    //
	    //	                        t3 = neg t1      ---- low 32
	    //	t3 =.J  neg  t1  ===>   t4 = adc t2,0    ---- high 32
	    //	                        t4 = neg t4
	    //
        Operand_Exp *imm0 = exprs.lookup_imm_exp(0, JIT_TYPE_INT);
        Operand *src_hi = _srcs[0]->hi_opnd();
        Inst *i = new (mem) Add_Inst(Add_Inst::adc,src_hi,imm0->opnd,exp,nxt);
        i->set_dst(_dst->hi_opnd()); i->set_expanded();
        i = new (mem) Neg_Inst(Neg_Inst::neg,_dst->hi_opnd(),exp,nxt);
        i->set_dst(_dst->hi_opnd()); i->set_expanded();
        return this;
    } else if (IS_FP_DBL_TYPE(ty)) {
        //
        //                           fp_stk = t2
        // t1 =.F  fneg  t2   ===>   fp_stk = fchs  --- change sign
        //                           t1 = fp_stk
        //
        Inst *epnd = new (mem) Native_Inst(Native_Inst::fchs,_srcs[0],exp,nxt);
        epnd->set_dst(_dst);
        unlink();
        return epnd;
    }
    return this;
}    

Inst *Bitwise_Inst::special_expansion(Expressions& exprs) {
    bool is_shift = (kind == shl || kind == shr || kind == sar);
    if (type() == JIT_TYPE_LONG && _dst->hi_opnd() != NULL && !is_shift)
        return default_long_expansion(exprs.mem); 

    //
    // shift amount must be loaded into ecx, unless it's an immediate
    //
    if (type() != JIT_TYPE_LONG &&
        is_shift &&
        _srcs[1]->kind != Operand::Immediate &&
        !(_srcs[1]->is_single_def_temp_reg() &&
        ((Temp_Reg *)_srcs[1])->inst()->is_imm_assignment())) {
        Operand_Exp *ecx = exprs.lookup_reg_exp(ecx_reg,JIT_TYPE_INT,0);
        Inst *epnd = new (exprs.mem) Assign_Inst(ecx->opnd,_srcs[1],exp,this);
        _srcs[1] = ecx->opnd;
        return epnd;
    }
    if (type() != JIT_TYPE_LONG || !is_shift) return this;
    Call_Inst::Kind call;
    switch (kind) {
    case shl: 
        call = Call_Inst::llsh_call;  break;
    case shr:
        call = Call_Inst::lrsz_call;  break;
    case sar: 
        call = Call_Inst::lrsh_call;  break;
    default: assert(0);
    }
    //
    //                             eax = t3   --- lo_32
    //  t7 =.L shl t3, t5   ===>   edx = t4   --- hi_32
    //                             ecx = t5   --- shift_num
    //                             call [llsh/lrsz/lrsh]
    //                             t7 = Ret.lo
    //                             t8 = Ret.hi
    // 
    Inst *nxt = next();
    Operand_Exp *eax = exprs.lookup_reg_exp(eax_reg,JIT_TYPE_INT,0);
    Operand_Exp *ecx = exprs.lookup_reg_exp(ecx_reg,JIT_TYPE_INT,0);
    Operand_Exp *edx = exprs.lookup_reg_exp(edx_reg,JIT_TYPE_INT,0);
    Operand_Exp *ret = exprs.lookup_ret_exp(JIT_TYPE_LONG);
    Operand *hi;
    GET_HI_OPND(hi,0);
    //
    // Ken: Do we need to create expressions at this point?
    //
    Inst **argarray = (Inst**)exprs.mem.alloc(3 * sizeof(*argarray));
    Inst *eax_asgn = new (exprs.mem) Assign_Inst(eax->opnd,_srcs[0],exp,nxt);
    Inst *edx_asgn = new (exprs.mem) Assign_Inst(edx->opnd,hi,exp,nxt);
    Inst *ecx_asgn = new (exprs.mem) Assign_Inst(ecx->opnd,_srcs[1],exp,nxt);
    Call_Inst *cll = new (exprs.mem) Call_Inst(call,exp,NULL,false,nxt);
    Inst *ret_asgn = new (exprs.mem) Assign_Inst(dst(),ret->opnd,exp,nxt);
    argarray[0] = eax_asgn; argarray[1] = edx_asgn; argarray[2] = ecx_asgn;
    Inst *a;
    for (a = next(); a != ret_asgn; a = a->next()) a->set_expanded();
    cll->set_args(argarray,3,ret_asgn);
    unlink(); // remove this instruction from the list
    return eax_asgn;
}

//
//                         spill_loc = t2
// t1 =.F i2f t2   ====>   fp_stk = spill_loc  --- fld spill_loc
//                         t1 = fp_stk         --- fst
//
// For the new FP register allocation, we make a change:
//                          spill_loc = t2
//                          t1 = spill_loc
//
static Inst *expand_li2df(Expressions& exprs, Inst *i) {
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = i->next();
    unsigned spill_id = exprs.reg_map.get_tmp_reg_id(i->src(0)->type == JIT_TYPE_LONG);
    Spill_Operand *spill = create_spill_opnd(mem,spill_id,i->src(0)->type);
    Inst *epnd = new (mem) Assign_Inst(spill,i->src(0),i->exp,nxt);
    new (mem) Assign_Inst(i->dst(),spill,i->exp,nxt);
    return epnd;
}

Inst *Convt_Inst::special_expansion(Expressions& exprs) {
    Operand *dst_hi/*, *src_hi*/;
    Mem_Manager& mem = exprs.mem;
    Inst *epnd, *nxt = next();
    O3_Jit_Type dst_ty = _dst->type;
    O3_Jit_Type src_ty = _srcs[0]->type;
    //
    //  i2b           After eliminating loads after stores, we propagate the
    //  istore_2      value of i2b.  The i2s has been turned into b2s during
    //  iload_2       building IR.
    //  i2s
    //
    if (src_ty == JIT_TYPE_BYTE || 
        src_ty == JIT_TYPE_CHAR || 
        src_ty == JIT_TYPE_BOOLEAN || 
        src_ty == JIT_TYPE_SHORT)
        src_ty = JIT_TYPE_INT;
    switch (src_ty) {
    case JIT_TYPE_INT:
        switch (dst_ty) {
        case JIT_TYPE_LONG:   // i2l     mov  ecx, eax
        {                     //         sar  ecx, 31
            dst_hi = _dst->hi_opnd();
            Operand_Exp *imm = exprs.lookup_imm_exp(31,JIT_TYPE_INT);
            epnd = new (mem) Assign_Inst(_dst,_srcs[0],exp,nxt);
            epnd->set_expanded();
            new (mem) Assign_Inst(dst_hi, _srcs[0], exp, nxt);
            Inst *sh = new (mem) Bitwise_Inst(Bitwise_Inst::sar,dst_hi,imm->opnd,exp,nxt);
            fixup_expanded_inst(sh,dst_hi,_dst);
            sh->set_expanded();
            unlink();
            break;
        }
        case JIT_TYPE_FLOAT:   // i2f
        case JIT_TYPE_DOUBLE:  // i2d
            epnd = expand_li2df(exprs,this);
            unlink();
            break;
        case JIT_TYPE_BYTE:    // i2b
        case JIT_TYPE_CHAR:    // i2c
        case JIT_TYPE_SHORT:   // i2s
            epnd = new (mem) Widen_Inst(_srcs[0],exp,nxt);
            epnd->set_dst(_dst);
            unlink();
            break;
        default: assert(0);
        }
        break;
    case JIT_TYPE_FLOAT:
    case JIT_TYPE_DOUBLE:
        //
        // convert float to int/long (generate helper function)
        //
        if (dst_ty == JIT_TYPE_INT || dst_ty == JIT_TYPE_LONG) {
            Inst *p = prev();
            Call_Inst::Kind call;
            unsigned sz = (src_ty == JIT_TYPE_FLOAT) ? 1 : 2;
            Inst **argarray = (Inst**)mem.alloc(sz * sizeof(*argarray));
            Operand_Exp *ret = exprs.lookup_ret_exp(dst_ty);
            (new (mem) Push_Inst(_srcs[0],exp,nxt))->expand(exprs);
            if (src_ty == JIT_TYPE_FLOAT) {
                call = (dst_ty == JIT_TYPE_INT)? Call_Inst::f2i_call : Call_Inst::f2l_call;
                argarray[0] = nxt->prev();
            } else { // src_ty == JIT_TYPE_DOUBLE
                call = (dst_ty == JIT_TYPE_INT)? Call_Inst::d2i_call : Call_Inst::d2l_call;
                argarray[1] = nxt->prev();
                argarray[0] = nxt->prev()->prev();
                argarray[0] = nxt->prev();
                sz = 1;
            }
            Call_Inst *cll = new (mem) Call_Inst(call,exp,NULL,false,nxt);
            Inst *retinst = new (mem) Assign_Inst(_dst,ret->opnd,exp,nxt);
            Inst *a;
            for (a = next(); a != retinst; a = a->next()) a->set_expanded();
            cll->set_args(argarray, sz, retinst);
            unlink();
            epnd = p->next();
        } else if (IS_FP_DBL_TYPE(dst_ty)) {  // d2f or f2d
#ifdef SAFE_D2F
            // Note: This code isn't very useful.  Code like "fstore_2; fload_2"
            // also implicitly does a d2f, but our FP spill code doesn't try to
            // reload from memory.

            // t1 =.F d2f t2
            //   ====>
            // spill_loc =.F t2
            // t1 =.F spill_loc
            if (dst_ty == JIT_TYPE_FLOAT)
            {
                unsigned spill_id = exprs.reg_map.get_tmp_reg_id(false);
                Spill_Operand *spill = create_spill_opnd(mem, spill_id, JIT_TYPE_FLOAT);
                epnd = new (mem) Assign_Inst(spill, src(0), exp, nxt);
                new (mem) Assign_Inst(dst(), spill, exp, nxt);
            }
            else
#endif // SAFE_D2F
            {
                epnd = new(mem) Assign_Inst(_dst, _srcs[0], exp, nxt);
                unlink();
            }
        } 
        else assert(0);
        break;
    case JIT_TYPE_LONG:
        if (dst_ty == JIT_TYPE_INT) {
            epnd = new (mem) Assign_Inst(_dst,_srcs[0],exp,nxt);
            unlink();
        }
        else if (IS_FP_DBL_TYPE(dst_ty)) {  // l2f or l2d
            epnd = expand_li2df(exprs,this);
            unlink();
        } else assert(0);
        break;
    default: assert(0);
    }
    return epnd;
}

//
//  generate   push  index
//             push  class_handle
//             call  getstring
//             dst =.L ret.L
//
Inst *String_Inst::special_expansion(Expressions& exprs) {
    Mem_Manager& mem = exprs.mem;
    Inst **argarray = (Inst**)mem.alloc(2 * sizeof(*argarray));
    argarray[0] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),this);
    argarray[1] = new (mem) Push_Inst(_srcs[1],((Inst_Exp*)exp)->rght_child(),this);
    Call_Inst *cll = new (mem) Call_Inst(Call_Inst::getstring_call,exp,NULL,false,this);
    Inst *retinst = new (mem) Assign_Inst(_dst, exprs.lookup_ret_exp(JIT_TYPE_CLASS)->opnd, exp, this);
    Inst *a;
    for (a = argarray[0]; a != retinst; a = a->next()) a->set_expanded();
    cll->set_args(argarray, 2, retinst);
    unlink();
    return argarray[0];
}

//
// push field_handle
// call classinit
//
Inst *Classinit_Inst::special_expansion(Expressions& exprs) {
    Mem_Manager& mem = exprs.mem;
    Inst **argarray = (Inst**)mem.alloc(1 * sizeof(*argarray));
    argarray[0] = new (mem) Push_Inst(_srcs[0],exp,this);
    Call_Inst *cll = new (mem) Call_Inst(Call_Inst::classinit_call,exp,NULL,false,this);
    argarray[0]->set_expanded(); cll->set_expanded();
    cll->set_args(argarray, 1, NULL);
    unlink();
    return argarray[0];
}

// Default sequence:
//   ecx = lea(_srcs[0])
//   edx = _srcs[1]
//   call ORP_RT_WRITE_BARRIER_FASTCALL
// New and improved sequence from Rick:
//   t99 = shr.A _srcs[0]->base(), GC_CARD_SHIFT_COUNT
//   [t99+p_virtual_global_card_table] =.B 0xff
Inst *Writebarrier_Inst::special_expansion(Expressions &exprs)
{
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = next();

    // (mjc 990712)  The code here violates the GC-ORE interface.  I'm not sure
    // what the right solution should be, but currently it's broken.  I've added
    // the extra #ifdef to be able to build the VM w/o the GC.

#ifdef BUILDING_ORP
// If we are creating a dll for the gc, flip the meaning of GCExport since the
// .h file convention is that if we aren't building the GC use GCImport.
#ifdef USE_GC_DLL
#ifdef BUILDING_GC
#error // dsp file broken we aren't building the GC. #define GCExport __declspec(dllexport)
#else
	// If we aren't building the gc flip the meaning of GCExport.
#define GCExport __declspec(dllimport)
#endif // BUILDING_GC
#else  // We aren't building a dll for the gc.
#define GCExport extern
#endif

#if 0
    extern bool O3_fast_wb;
    GCExport byte *p_virtual_global_card_table;
#define GC_CARD_SHIFT_COUNT 12
    // This code no longer works and should not be used.
    if (O3_fast_wb)
    {
        Inst *epnd;
        // We create the temp reg as a reference type.  This would be a problem if the
        // shifted result were enumerated at GC time, but it won't happen because the
        // instruction is marked as GC-unsafe.
        Reg_Operand *card = exprs.create_new_temp_reg(JIT_TYPE_CLASS);
        assert(src(0)->kind == Operand::Field);
        Operand *base = src(0)->base();
        Operand *shift_count = exprs.lookup_imm_exp(GC_CARD_SHIFT_COUNT, JIT_TYPE_INT)->opnd;
        Inst *shift_inst = new(mem) Bitwise_Inst(Bitwise_Inst::shr, base, shift_count, exp, nxt);
        epnd = shift_inst;
        shift_inst->set_dst(card);
        shift_inst->set_gc_unsafe();
        Operand *field = new(mem) Field_Operand(card, // no null pointer exception
            (unsigned)p_virtual_global_card_table, JIT_TYPE_BYTE, 0,false); 
        Operand *oxff = exprs.lookup_imm_exp(0xff, JIT_TYPE_INT)->opnd;
        Inst *mark = new(mem) Assign_Inst(field, oxff, exp, nxt);
        mark->set_gc_unsafe();
        unlink();
        return epnd;
    }
#endif
#endif // BUILDING_ORP

#ifndef JIT_SAPPHIRE
    Inst **argarray = (Inst**)mem.alloc(1 * sizeof(*argarray));
    Operand *ecxreg = exprs.lookup_reg_exp(ecx_reg, JIT_TYPE_CLASS, 0)->opnd;
    Operand *addr = src(0);
    assert(addr->kind == Operand::Field);
    Field_Operand *faddr = (Field_Operand *) addr;
    argarray[0] = new(mem) Assign_Inst(ecxreg, faddr->base(), exp, nxt);
    Call_Inst *cll = new(mem) Call_Inst(Call_Inst::writebarrier_call, exp, NULL, false, nxt);
    cll->set_args(argarray, 1, NULL);
    cll->set_expanded();
    argarray[0]->set_gc_unsafe();
    cll->set_gc_unsafe();
    unlink();
#else
	assert(exp->is_inst_exp());
	int esp_adjust = 0;
    Inst **argarray = (Inst**)mem.alloc(n_srcs * sizeof(*argarray));
	if (n_srcs == 2) { // putstatic
		argarray[0] = new (mem) Push_Inst(_srcs[1],((Inst_Exp*)exp)->rght_child(),nxt); // push value
		argarray[1] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push base
		esp_adjust = 8;
	} else {
		Exp *imm = exprs.lookup_imm_exp(0, JIT_TYPE_INT); // imm exp is purely used for creating push
		argarray[0] = new (mem) Push_Inst(_srcs[2],((Inst_Exp*)exp)->rght_child(),nxt); // push value
		argarray[1] = new (mem) Push_Inst(_srcs[1],imm,nxt); // push off
		argarray[2] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push base
		esp_adjust = (IS_64BIT(wb_kind)) ? 16 : 12; // long/double needs 2 pushes
	}
    Call_Inst *cll = new(mem) Call_Inst(Call_Inst::writebarrier_call, exp, NULL, false, nxt);
    cll->set_args(argarray, n_srcs, NULL);
    cll->set_expanded();
	cll->wb_kind = wb_kind;
	//
    // caller-pop; adjust esp by adding n_srcs*4
	//
    Operand_Exp *esp = exprs.lookup_reg_exp(esp_reg,JIT_TYPE_INT,0);
    Operand_Exp *imm = exprs.lookup_imm_exp(esp_adjust, JIT_TYPE_INT);
    Inst_Exp *add = exprs.lookup_inst_exp(Exp::Add,esp,imm,JIT_TYPE_INT);
    Inst *i = new (mem) Add_Inst(Add_Inst::add,esp->opnd,imm->opnd,add,nxt);
    i->set_dst(esp->opnd);
    i->dont_eliminate();
    unlink();
#endif
    return argarray[0];
}

#ifdef STAT_INDIRECT_CALL
//
//For indirect branches statistics, instrument a call before them
//
Inst *StatIndirectCall_Inst::special_expansion(Expressions &exprs)
{
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = next();

	assert(exp->is_inst_exp());
    Inst **argarray = (Inst**)mem.alloc(n_srcs * sizeof(*argarray));
	int esp_adjust = 0;
	if (n_srcs == 1) { // Only a Method Handle
		argarray[0] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); 
		esp_adjust = 4;
	} else if(n_srcs == 2) { //Method Handle and Offset
		Exp *imm = exprs.lookup_imm_exp(0, JIT_TYPE_INT); // imm exp is purely used for creating push

		argarray[0] = new (mem) Push_Inst(_srcs[1],imm,nxt); // push off
		argarray[1] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push base
		esp_adjust = 8;
	}else{ // default is wrong
		assert(0) ;
	}
    Call_Inst *cll = new(mem) Call_Inst(Call_Inst::stat_indirect_call/*Call_Inst::readbarrier_call*/, exp, NULL, false, nxt); //##
    cll->set_args(argarray, n_srcs, NULL);
    cll->set_expanded();
	argarray[0]->set_gc_unsafe() ;
	cll->set_gc_unsafe() ;

    Operand_Exp *esp = exprs.lookup_reg_exp(esp_reg,JIT_TYPE_INT,0);
    Operand_Exp *imm = exprs.lookup_imm_exp(esp_adjust, JIT_TYPE_INT);
    Inst_Exp *add = exprs.lookup_inst_exp(Exp::Add,esp,imm,JIT_TYPE_INT);
    Inst *i = new (mem) Add_Inst(Add_Inst::add,esp->opnd,imm->opnd,add,nxt);

	i->set_dst(esp->opnd);
    i->dont_eliminate();

    unlink();

	return argarray[0];
}
#endif

#ifdef O3_VTune_Support
//
//For indirect branches statistics, instrument a call before them
//
Inst *VTune_Call_Inst::special_expansion(Expressions &exprs)
{
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = next();

	assert(exp->is_inst_exp());
    Inst **argarray = (Inst**)mem.alloc(n_srcs * sizeof(*argarray));
	int esp_adjust = 0;
	if (n_srcs == 1) { // Only a Method Handle
		argarray[0] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); 
		esp_adjust = 4;
	}
    Call_Inst *cll = new(mem) Call_Inst(Call_Inst::vtune_method_call/*Call_Inst::readbarrier_call*/, exp, NULL, false, nxt); //##
    cll->set_args(argarray, n_srcs, NULL);
    cll->set_expanded();
	argarray[0]->set_gc_unsafe() ;
	cll->set_gc_unsafe() ;

    Operand_Exp *esp = exprs.lookup_reg_exp(esp_reg,JIT_TYPE_INT,0);
    Operand_Exp *imm = exprs.lookup_imm_exp(esp_adjust, JIT_TYPE_INT);
    Inst_Exp *add = exprs.lookup_inst_exp(Exp::Add,esp,imm,JIT_TYPE_INT);
    Inst *i = new (mem) Add_Inst(Add_Inst::add,esp->opnd,imm->opnd,add,nxt);

	i->set_dst(esp->opnd);
    i->dont_eliminate();

    unlink();

	return argarray[0];
}
#endif

#ifdef JIT_SAPPHIRE
//::
// READ BARRIER, similar with write barrier
//::
Inst *Readbarrier_Inst::special_expansion(Expressions &exprs)
{
    Mem_Manager& mem = exprs.mem;
    Inst *nxt = next();

	
	assert(exp->is_inst_exp());
	int esp_adjust = 0;
    Inst **argarray = (Inst**)mem.alloc(n_srcs * sizeof(*argarray));
	if (n_srcs == 1) { // getstatic
		argarray[0] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push base
		esp_adjust = 4;
	} else {
		Exp *imm = exprs.lookup_imm_exp(0, JIT_TYPE_INT); // imm exp is purely used for creating push

		//::
		/*
	    Operand_Exp *eax = exprs.lookup_reg_exp(eax_reg,JIT_TYPE_INT,0);
		new (mem) Push_Inst(eax->opnd,imm,nxt) ;
		*/
		//::
		
		argarray[0] = new (mem) Push_Inst(_srcs[1],imm,nxt); // push off
		argarray[1] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push base
		esp_adjust = 8; 
	}
    Call_Inst *cll = new(mem) Call_Inst(Call_Inst::readbarrier_call, exp, NULL, false, nxt); //##
    cll->set_args(argarray, n_srcs, NULL);
    cll->set_expanded();
	cll->wb_kind = wb_kind;
	//
    // caller-pop; adjust esp by adding n_srcs*4
	//

	//::
	/*
	if(n_srcs > 1){
		Operand_Exp *eax2 = exprs.lookup_reg_exp(eax_reg,JIT_TYPE_INT,0);
		Exp *imm2 = exprs.lookup_imm_exp(0, JIT_TYPE_INT); // imm exp is purely used for creating push
		new (mem) Pop_Inst(eax2->opnd,imm2,nxt) ;
	}
	*/
	//::

    Operand_Exp *esp = exprs.lookup_reg_exp(esp_reg,JIT_TYPE_INT,0);
    Operand_Exp *imm = exprs.lookup_imm_exp(esp_adjust, JIT_TYPE_INT);
    Inst_Exp *add = exprs.lookup_inst_exp(Exp::Add,esp,imm,JIT_TYPE_INT);
    Inst *i = new (mem) Add_Inst(Add_Inst::add,esp->opnd,imm->opnd,add,nxt);

	i->set_dst(esp->opnd);
    i->dont_eliminate();
    unlink();
    return argarray[0];
}
#endif //JIT_SAPPHIRE

//
//  generate   push  src_hi
//             push  src_lo
//
Inst *Push_Inst::special_expansion(Expressions& exprs) {
    Mem_Manager& mem = exprs.mem;
    O3_Jit_Type ty = /*type()*/_srcs[0]->type;
    Inst *epnd = this;
    if (ty == JIT_TYPE_LONG && _srcs[0]->hi_opnd() != NULL) {
        epnd = new (mem) Push_Inst(_srcs[0]->hi_opnd(),exp,this);
        epnd->set_expanded();
    }
    return epnd;
}

const Branch_Inst::Kind Compare_Inst::br_lcmp_map[Branch_Inst::n_br][3] = {
{Branch_Inst::n_br, Branch_Inst::bne,  Branch_Inst::beq},  // if (x != y) ifeq
{Branch_Inst::bne,  Branch_Inst::n_br, Branch_Inst::bne},  // if (x == y) ifne
{Branch_Inst::blt,  Branch_Inst::bgt,  Branch_Inst::blt},  // if (x >= y) iflt
{Branch_Inst::bgt,  Branch_Inst::blt,  Branch_Inst::bge},  // if (x < y)  ifge
{Branch_Inst::bgt,  Branch_Inst::blt,  Branch_Inst::bgt},  // if (x <= y) ifgt
{Branch_Inst::blt,  Branch_Inst::bgt,  Branch_Inst::ble},  // if (x > y)  ifle
};

extern bool O3_is_PPro;
extern bool O3_unsafe_fcmp;
Inst *Compare_Inst::special_expansion(Expressions& exprs) {
    assert(_dst->is_status_flags());
    Mem_Manager& mem = exprs.mem;
    if (IS_FP_DBL_TYPE(type())) {
        //  fld    [eax + 4]   --- load src1 unto fp stack
        //  fcomp  [eax + 8]   --- compare src1, src2
        //  fnstsw eax         --- store fp status word to eax without checking
        //                         unmasked floating-point error condition
        //  sahf               --- loads SF, ZF, AF, PF, and CF flags with 
        //                         values from the eax (AH)
        Inst *br = next();
        if (!br->is_branch()) return this;  // XXX- hack!
        assert(br->is_branch());
        Inst *epnd = this;
        if (!O3_is_PPro)
        {
            Operand *eax = exprs.lookup_reg_exp(eax_reg,JIT_TYPE_INT,0)->opnd;
            (new (mem) Native_Inst(Native_Inst::fnstsw,_dst,exp,br))->set_dst(eax);
            (new (mem) Native_Inst(Native_Inst::sahf,eax,exp,br))->set_dst(_dst);
        }
        // Half the time, we need to add an extra branch instruction to handle
        // comparisons with NaN.
        if (!O3_unsafe_fcmp)
        {
            Branch_Inst *branch = (Branch_Inst *)br;
            Branch_Inst *newbr = NULL;
            switch (branch->kind())
            {
            case Branch_Inst::beq:
                // Add a "jp <fallthrough>" before the existing branch.
                newbr = new(mem) Branch_Inst(Branch_Inst::bp, false, _dst, exp, br);
                newbr->set_fallthrough();
                break;
            case Branch_Inst::bne:
                // Add a "jp <target>" after the existing branch.
                newbr = new(mem) Branch_Inst(Branch_Inst::bp, false, _dst, exp, br->next());
                break;
            case Branch_Inst::blt:
            case Branch_Inst::ble:
                if (is_cmpg)
                {
                    // Add a "jp <fallthrough>" before the existing branch.
                    newbr = new(mem) Branch_Inst(Branch_Inst::bp, false, _dst, exp, br);
                    newbr->set_fallthrough();
                }
                break;
            case Branch_Inst::bge:
            case Branch_Inst::bgt:
                if (is_cmpg)
                {
                    // Add a "jp <target>" after the existing branch.
                    newbr = new(mem) Branch_Inst(Branch_Inst::bp, false, _dst, exp, br->next());
                }
                break;
            }
            if (newbr != NULL)
                newbr->dont_commute();
        }
        return epnd;
    } else if (type() == JIT_TYPE_LONG) {
        //
        // for long comparison, we compare hi 32 bits then low 32 bits
        // e.g.,  if (x > y) {   bytecode: lload(x)
        //           ...                   lload(y)
        //        } else {                 lcmp
        //           ...                   ifle  --> else_label
        //        }
        //
        // Assume that Xh and Xl are high and low 32 bit of X, respectively.
        //        cmp Xh, Yh
        //        jl  --> else_label
        //        jg  --> if_label
        //        cmp Xl, Yl
        //        jle --> else_label
        //
        Operand *s0_hi = _srcs[0]->hi_opnd();
        Operand *s1_hi = _srcs[1]->hi_opnd();
        assert(s0_hi != NULL && s1_hi != NULL);
        Inst *epnd = new (mem) Compare_Inst(cmp,false,s0_hi,s1_hi,exp,false,this);
        epnd->set_dst(_dst);
        epnd->set_expanded();
        assert(next()->is_branch());
        Branch_Inst::Kind br = ((Branch_Inst*)next())->kind();
        Branch_Inst *tmp;
        if (br_lcmp_map[br][0] != Branch_Inst::n_br)
            tmp = new (mem) Branch_Inst(br_lcmp_map[br][0],true,_dst,exp,this); 
        if (br_lcmp_map[br][1] != Branch_Inst::n_br)
        {
            tmp = new (mem) Branch_Inst(br_lcmp_map[br][1],true,_dst,exp,this);
            tmp->set_fallthrough();
        }
        tmp->dont_commute();
        return epnd;
    }
    return this;
}

void Compare_Inst::bounds_expansion(Expressions& exprs, Cfg_Node *node, Flow_Graph *fg) {
    assert(_dst->is_status_flags());
    Mem_Manager& mem = exprs.mem;
#ifdef TURN_OFF_BOUNDS
    if (gen_branch) _dead = true;
#endif
#if 1
    if (gen_branch && node->flowgraph->remove_all_bounds_checks)
        _dead = true;
#endif
    if (gen_branch && !_dead) {
        // Create a special Cfg_Node that has just a call to the ORP helper function.
        // XXX- Potential problem: we create one bounds exception block per Eh_Node
        // and one for the flow graph.  However, if two instructions branch to the
        // same node, but with a different number of pushed arguments, the esp_adjustment
        // will be wrong, and stack unwinding will be done incorrectly.  Question:
        // does our IR allow bounds checking after pushing arguments?
        Cfg_Node *newnode = NULL;
        if (node->eh_out_edge() != NULL)
            newnode = node->eh_out_edge()->bounds_exception();
        else
            newnode = fg->bounds_exception();
        if (newnode == NULL)
        {
            newnode = fg->create_bounds_exception_block(node);
            new(mem) Call_Inst(Call_Inst::bounds_call,exp,NULL,false,
                newnode->IR_instruction_list());
            if (node->eh_out_edge() == NULL)
                fg->set_bounds_exception(newnode);
            else
                node->eh_out_edge()->set_bounds_exception(newnode);
        }
        Branch_Inst *binst;
        Operand *src0 = _srcs[0];
        // KKK 
        if (src0->is_single_def_temp_reg() && 
            ((Temp_Reg*)src0)->inst()->is_imm_assignment()) {
            _srcs[0] = _srcs[1]; // swap operand
            _srcs[1] = ((Temp_Reg*)src0)->inst()->src(0);
            binst = new(mem) Branch_Inst(Branch_Inst::ble, false, _dst,exp,next(),newnode);
        } else {
            // Create a branch instruction:
            //   cmp index, A.length
            //   jae <target>
            // The condition is "ge", with an unsigned test.
            binst =new(mem) Branch_Inst(Branch_Inst::bge, false, _dst, exp, next(), newnode);
        }
        binst->set_bound_branch();  // binst is a bound checking branch
        binst->set_expanded();
    }
}

Inst *Obj_Info_Inst::special_expansion(Expressions& exprs) {
    Inst *epnd;
    Mem_Manager &mem = exprs.mem;
    O3_Jit_Type ty = type();
    Field_Operand *fld;
    if (kind == length) {        // length t1  ===>  [t1 + 4]
		//::Change the hard code '4' to a VM interface array_length_offset()
		unsigned length_offset = array_length_offset() ;
	    fld = new (mem) Field_Operand((Reg_Operand*)_srcs[0],/*4*/length_offset,ty,0,true);
	    epnd = new (mem) Assign_Inst(_dst,fld,exp,next());
    } else if (kind == vtable) { // vtable t1 ===> [t1]
	    fld = new (mem) Field_Operand((Reg_Operand*)_srcs[0],0,ty,0,true);
	    epnd = new (mem) Assign_Inst(_dst,fld,exp,next());
    } else if (kind == intfcvtable) {
        Inst *nxt = next();
        Inst **argarray = (Inst**)mem.alloc(2 * sizeof(*argarray));
        argarray[0] = new (mem) Push_Inst(_srcs[1],((Inst_Exp*)exp)->rght_child(),nxt); // push ch
        argarray[1] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),nxt); // push object
        Call_Inst *call = new(mem) Call_Inst(Call_Inst::resintfc_call, exp, NULL, false, nxt);
        Operand_Exp *ret = exprs.lookup_ret_exp(JIT_TYPE_ADDR);
        Inst *result_assn = new(mem) Assign_Inst(_dst, ret->opnd, exp, nxt);
        epnd = argarray[0];
        Inst *a;
        for (a = epnd; a != result_assn; a = a->next()) a->set_expanded();
        call->set_args(argarray, 2, result_assn);
    }
    unlink();
    return epnd;
}

Inst *Type_Inst::special_expansion(Expressions& exprs) {
    assert (kind == cast || kind == instance);
    Mem_Manager& mem = exprs.mem;
    Inst *retinst;
    Call_Inst *cll;
    Operand_Exp *ret;

    Inst **argarray = (Inst**)mem.alloc(2 * sizeof(*argarray));
    argarray[0] = new (mem) Push_Inst(_srcs[0],((Inst_Exp*)exp)->left_child(),this);
    argarray[1] = new (mem) Push_Inst(_srcs[1],((Inst_Exp*)exp)->rght_child(),this);
    if (kind == cast) {
        ret = exprs.lookup_ret_exp(JIT_TYPE_CLASS);
        cll = new (mem) Call_Inst(Call_Inst::checkcast_call,exp,NULL,false,this);
    } else { // instance
        ret = exprs.lookup_ret_exp(JIT_TYPE_INT);
        cll = new (mem) Call_Inst(Call_Inst::instanceof_call,exp,NULL,false,this);
    }
    retinst = new (mem) Assign_Inst(_dst,ret->opnd,exp,this);
    Inst *a;
    for (a = argarray[0]; a != retinst; a = a->next()) a->set_expanded();
    cll->set_args(argarray, 2, retinst);
    unlink(); // remove i from the instruction list
    return argarray[0];
}

//
// find high and low 
//
static void find_low_high(int *matches, int size, int& low, int& high) {
    low  = 0x7FFFFFFF; // biggest positive number
    high = 0x80000000; // smallest negative number
    int i;
    for (i = 0; i < size; i++) {
        if (matches[i] < low)
            low = matches[i];
        if (matches[i] > high)
            high = matches[i];
    }
}

void O3_inner_bb_instrument(Cfg_Node* node , Expressions& exprs, Inst* nxt)
{
	assert(Inner_O3_statistics) ;
	assert(node->flowgraph) ;
	assert(node->flowgraph->inner_counter_num < O3_MIN_INNER_BRANCH_SIZE-1) ;
	//
	// generate "inc counter, 1"
	//
	O3_Jit_Type ty;
	void *addr = (void*)& node->flowgraph->inner_counter[node->flowgraph->inner_counter_num*2];
	node->flowgraph->inner_counter_num++ ;
	Operand_Exp *imm1;
	if (sizeof(O3_PROF_COUNTER) == sizeof(uint64)) {
		ty = JIT_TYPE_LONG;
		Value val;
		val.l.hi = 0; val.l.lo = 1;
		imm1 = exprs.lookup_const_exp(&val,ty);
	} else {
		ty = JIT_TYPE_INT;
		imm1 = exprs.lookup_imm_exp(1,ty);
	}
	Operand_Exp *cnt = exprs.lookup_static_exp(addr, ty, addr, NULL);
	Exp *plus = exprs.lookup_inst_exp(Exp::Add,cnt,imm1,ty);
	Exp *asgn = exprs.lookup_inst_exp(Exp::Assign,cnt,plus,ty);
	Inst *inc = new (exprs.mem) Add_Inst(Add_Inst::add,cnt->opnd,imm1->opnd,asgn,nxt);
	inc->set_dst(cnt->opnd);
	inc->expand(exprs);
}

//
// expand into   cmp _srcs[0], imm1
//               beq ...
//               cmp _srcs[0], imm2
//               beq ...
//               ...
Inst *Switch_Inst::expand_lookupswitch(Expressions& exprs, 
                                       Cfg_Node *node,
                                       lookupswitch_info *sw) {
    Mem_Manager& mem = exprs.mem;
    Inst *cmp, *nxt = next();
    Operand_Exp *imm;
    Branch_Inst *br;
    Status_Flags *status =
    new (mem) Status_Flags(exprs.reg_map.get_tmp_reg_id(0),type());
    int i,j;
    for (i = 0; i < sw->size; i++) {
        for (j = i+1; j < sw->size; j++) 
            if (sw->matches[i]+(j-i) != sw->matches[j] ||
                //::node->out_edges(j+1) != node->out_edges(i+1)) break;
				//::To avoid redundent out_edges, now we use sw->offsets[] to determine the targets
				sw->offsets[j+1] != sw->offsets[i+1]) break ;
        //
        // collapse   cmp _srcs[0], 1          
        //            beq ttt 
        //            cmp _srcs[0], 2             cmp _srcs[0], 9
        //            beq ttt                     bgt yyy
        //               ...            ===>      cmp _srcs[0], 1 
        //            cmp _srcs[0], 9             bge ttt
        //            beq ttt 
        //     yyy    cmp _srcs[0], 13     yyy    cmp _srcs[0], 13
        //            beq zzz                     beq zzz
        //
        if (j > i+2) {// more than 3 consecutive numbers
            imm = exprs.lookup_imm_exp(sw->matches[j-1],JIT_TYPE_INT);
            cmp = new (mem) Compare_Inst(Compare_Inst::cmp,false,_srcs[0],
                                         imm->opnd,exp,false,nxt);
            cmp->set_dst(status);
            br = new (mem) Branch_Inst(Branch_Inst::bgt,true,status,exp,nxt); 
            if (j == sw->size)
                br->set_cfg_idx(0);
            else
                br->set_jump_over_next_branch();

			br->dont_commute() ;//::Don't commute/swap the edges!

			//
			// for O3 statistics, inner bb instrument
			//
			if(Inner_O3_statistics)
				O3_inner_bb_instrument(node , exprs, nxt) ; 

            imm = exprs.lookup_imm_exp(sw->matches[i],JIT_TYPE_INT);
            cmp = new (mem) Compare_Inst(Compare_Inst::cmp,false,_srcs[0],
                                         imm->opnd,exp,false,nxt);
            cmp->set_dst(status);
            br = new (mem) Branch_Inst(Branch_Inst::bge,true,status,exp,nxt); 

            br->set_cfg_idx(sw->offsets[j]); //::j
			br->dont_commute() ;//::Don't commute/swap the edges!
			//
			// for O3 statistics, inner bb instrument
			//
			if(Inner_O3_statistics)
				O3_inner_bb_instrument(node , exprs, nxt) ; 

            i = j-1;
        } else {
            imm = exprs.lookup_imm_exp(sw->matches[i],JIT_TYPE_INT);
            cmp = new (mem) Compare_Inst(Compare_Inst::cmp,false,_srcs[0],
                                         imm->opnd,exp,false,nxt);
            cmp->set_dst(status);
            br = new (mem) Branch_Inst(Branch_Inst::beq,false,status,exp,nxt); 
            br->set_cfg_idx(sw->offsets[i+1]); //::i+1 // first one is default
			br->dont_commute() ;//::Don't commute/swap the edges!
			//
			// for O3 statistics, inner bb instrument
			//
			if(Inner_O3_statistics)
				O3_inner_bb_instrument(node , exprs, nxt) ; 

        }
    }
    return next();  // first cmp
}

//
// cmp src_reg, low
// jlt default
// cmp src_reg, high
// jgt default
// sub  src_reg,low		; only if low != 0
// add src_reg,src_reg  ; shift left by 2
// add src_reg,src_reg
// jmp  [src_reg + table_base]
//
Inst *Switch_Inst::expand_tableswitch(Expressions& exprs, 
                                      Cfg_Node *node,
                                      int low,
                                      int high) {
    Mem_Manager& mem = exprs.mem;
    Inst *cmp, *nxt = next();
    Operand_Exp *imm;
    Branch_Inst *br;
    Status_Flags *status = 
    new (mem) Status_Flags(exprs.reg_map.get_tmp_reg_id(0),type());
    Operand_Exp *imm_lo = exprs.lookup_imm_exp(low,JIT_TYPE_INT);
    Inst *epnd = new (mem) Compare_Inst(Compare_Inst::cmp,false,_srcs[0],
                                        imm_lo->opnd,exp,false,nxt);
    epnd->set_dst(status);
    br = new (mem) Branch_Inst(Branch_Inst::blt,true,status,exp,nxt); 
    br->set_cfg_idx(0); // first one is default

	//
	// for O3 statistics, inner bb instrument
	//
	if(Inner_O3_statistics)
		O3_inner_bb_instrument(node , exprs, nxt) ; 
    
	imm = exprs.lookup_imm_exp(high,JIT_TYPE_INT);
    cmp = new (mem) Compare_Inst(Compare_Inst::cmp,false,_srcs[0],
                                  imm->opnd,exp,false,nxt);
    cmp->set_dst(status);
    br = new (mem) Branch_Inst(Branch_Inst::bgt,true,status,exp,nxt); 
    br->set_cfg_idx(0); // first one is default

	//
	// for O3 statistics, inner bb instrument
	//
	if(Inner_O3_statistics)
		O3_inner_bb_instrument(node , exprs, nxt) ; 

    // create a temp for sub's dst
    Temp_Reg *tmp = exprs.create_new_temp_reg(imm_lo->type); 
    Inst *i = new (mem) Sub_Inst(Sub_Inst::sub,_srcs[0],imm_lo->opnd,exp,nxt);
    i->set_dst(tmp);
    (new (mem) Add_Inst(Add_Inst::add,tmp,tmp,exp,nxt))->set_dst(tmp);
    (new (mem) Add_Inst(Add_Inst::add,tmp,tmp,exp,nxt))->set_dst(tmp);
    //
    // create jump
    //
	Field_Operand *fld = new (mem) Field_Operand(tmp,0,JIT_TYPE_INT,0,false);
    new (mem) Jump_Inst(Jump_Inst::jump_switch,fld,exp,nxt);
    return epnd;
}

Inst *Switch_Inst::special_expansion(Expressions& exprs, Cfg_Node *node) {
    Inst *epnd;
    if (kind == lookup) {
        lookupswitch_info *sw = (lookupswitch_info*)sw_info;
        epnd = expand_lookupswitch(exprs, node, sw);
    } else if (kind == table) {
        tableswitch_info *sw = (tableswitch_info*)sw_info;
        epnd = expand_tableswitch(exprs, node, sw->low, sw->high);
    } else assert(0);
    unlink();
    return epnd;
}

Inst *Call_Inst::special_expansion(Expressions& exprs) {
    set_expanded();
    if (_n_args == 0) return this;

    Inst *new_arg1, *new_arg2;
    unsigned i = _sz_of_arg_array;
    unsigned j;
    for (j = _n_args; j > 0; j--) {
        Assign_Inst *arg_asgn = (Assign_Inst*)_args[j-1];
        assert(arg_asgn->is_assignment());
        arg_asgn->arg_expansion(exprs,get_mhandle(),new_arg1,new_arg2);
        if (new_arg2 != NULL) {
            assert(i > j);
            _args[--i] = new_arg2;
        }
        assert(i >= j);
        _args[--i] = new_arg1;
    }
    assert(i == 0);
    _n_args = _sz_of_arg_array;
    return _args[0];
}

//
// routines for emitting code (Imm_Operand)
//
void Imm_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
     const R_Opnd *const dst) {
    emitter.emit_mov(dst,&Imm_Opnd(_imm));
}
void Imm_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
     const R_Opnd *const dst,bool ok_with_carry) {
    if (ok_with_carry)
        emitter.emit_mov(dst,&Imm_Opnd(_imm));
    else
        emitter.emit_mov_imm32(dst,_imm);
}
void Imm_Operand::emit_mov_to_mem(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
         const M_Opnd *const dst, X86_Opnd_Size sz=opnd_32){
    emitter.emit_mov(dst,&Imm_Opnd(_imm),sz);
}
void Imm_Operand::emit_push(O3_Emitter& emitter,X86_Opnd_Pool& xopnds){
    emitter.emit_push(&Imm_Opnd(_imm));
}
void Imm_Operand::emit_alu_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                       X86_ALU_Opcode opc, Operand*dst) {
    emitter.emit_alu(opc,x86_opnds.get_rm_opnd(dst),&Imm_Opnd(_imm), (dst->type != JIT_TYPE_LONG));
}
void Imm_Operand::emit_imul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                 Operand*dst){
    //
    // replace with shift 
    //
    int v;
    if ((v = is_power2(_imm)) != -1)
        emitter.emit_shift(shl_opc,x86_opnds.get_rm_opnd(dst),&Imm_Opnd(v));   
    else if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),&Imm_Opnd(_imm));
    else assert(0);
}
void Imm_Operand::emit_shift(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                             X86_Shift_Opcode opc,Operand*dst) {
    emitter.emit_shift(opc,x86_opnds.get_rm_opnd(dst),&Imm_Opnd(_imm));
}

//
// routines for emitting code (Reg_Operand)
//
void Reg_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                                  const R_Opnd *const dst) {
    if (assigned_preg() != n_reg)
        emitter.emit_mov(dst,&R_Opnd(assigned_preg()));
    else 
        emitter.emit_mov(dst,x86_opnds.get_m_opnd(this));
}
void Reg_Operand::emit_cmov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                                   const R_Opnd *const dst, X86_CC cc, unsigned is_signed) {
    if (assigned_preg() != n_reg)
        emitter.emit_cmov(dst,&R_Opnd(assigned_preg()), cc, is_signed);
    else 
        emitter.emit_cmov(dst,x86_opnds.get_m_opnd(this), cc, is_signed);
}
void Reg_Operand::emit_mov_to_mem(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
     const M_Opnd *const dst, X86_Opnd_Size sz=opnd_32){
    if (assigned_preg() != n_reg)
        emitter.emit_mov(dst,&R_Opnd(assigned_preg()),sz);
    else assert(0);
}
void Reg_Operand::emit_alu_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                                X86_ALU_Opcode opc, Operand*dst) {
    if (dst->assigned_preg() != n_reg)
        emitter.emit_alu(opc,x86_opnds.get_r_opnd(dst),x86_opnds.get_rm_opnd(this));
    else if (dst->is_reg() || dst->is_mem()) {
        assert(assigned_preg() != n_reg);
        emitter.emit_alu(opc,x86_opnds.get_m_opnd(dst),&R_Opnd(assigned_preg()));
    }
    else assert(0);
}
//#ifdef INLINE_NATIVE
void Reg_Operand::emit_mul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                 Operand*dst){
    emitter.emit_mul(x86_opnds.get_rm_opnd(this),0);
}
//#endif
void Reg_Operand::emit_imul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                 Operand*dst){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_rm_opnd(this));
    else assert(0);
}
void Reg_Operand::emit_imul_inst_3(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                   Operand*dst, Operand *imm){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_rm_opnd(this),
        &Imm_Opnd(((Imm_Operand *)imm)->imm()));
    else assert(0);
}
void Reg_Operand::emit_push(O3_Emitter& emitter,X86_Opnd_Pool& xopnds){
    if (assigned_preg() != n_reg)
        emitter.emit_push(&R_Opnd(assigned_preg()));
    else 
        emitter.emit_push(xopnds.get_m_opnd(this));
}
void Reg_Operand::emit_pop(O3_Emitter& emitter,X86_Opnd_Pool& xopnds){
    emitter.emit_pop(&R_Opnd(assigned_preg()));
}
void Reg_Operand::emit_test(O3_Emitter& emitter, X86_Opnd_Pool& xopnds){
    R_Opnd r(assigned_preg());
    if (assigned_preg() != n_reg)
        emitter.emit_test(&r,&r);
    else assert(0);
}
void Reg_Operand::emit_shift(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
               X86_Shift_Opcode opc,Operand*dst) {
    assert(assigned_preg() == ecx_reg);
    emitter.emit_shift(opc,x86_opnds.get_rm_opnd(dst));
}
void Reg_Operand::emit_fld(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    assert(IS_FP_DBL_TYPE(type));
    if (is_fp_stk())
        emitter.emit_fld(((Fp_Stk *)this)->stk);
    else
        emitter.emit_fld(x86_opnds.get_m_opnd(this), type == JIT_TYPE_DOUBLE);
}
void Reg_Operand::mov_fp_stk_to(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, unsigned pop) {
    assert(IS_FP_DBL_TYPE(type));
    emitter.emit_fst(x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE,pop);
}
void Reg_Operand::emit_fp_op(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                             X86_FP_Opcode opc,bool pop_stk0,Operand* dst) {
    assert(dst->is_fp_stk() && ((Fp_Stk*)dst)->stk == 0 && IS_FP_DBL_TYPE(type));
    assert(!pop_stk0);
    emitter.emit_fp_op_mem(opc,x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE);
}

//
// routines for emitting code (Mem_Operand)
//
void Mem_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                         const R_Opnd *const dst) {
    emitter.emit_mov(dst,x86_opnds.get_m_opnd(this));
}
void Mem_Operand::emit_cmov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                          const R_Opnd *const dst, X86_CC cc, unsigned is_signed) {
    emitter.emit_cmov(dst,x86_opnds.get_m_opnd(this), cc, is_signed);
}
void Mem_Operand::emit_alu_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                   X86_ALU_Opcode opc,Operand*dst) {
    emitter.emit_alu(opc,x86_opnds.get_r_opnd(dst),x86_opnds.get_m_opnd(this));
}
void Mem_Operand::emit_imul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                 Operand*dst){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_m_opnd(this));
    else assert(0);
}
void Mem_Operand::emit_imul_inst_3(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                   Operand*dst, Operand *imm){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_m_opnd(this),
        &Imm_Opnd(((Imm_Operand *)imm)->imm()));
    else assert(0);
}
void Mem_Operand::emit_push(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    emitter.emit_push(x86_opnds.get_m_opnd(this));
}
void Mem_Operand::emit_pop(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    emitter.emit_pop(x86_opnds.get_m_opnd(this));
}
void Mem_Operand::emit_fld(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    if (IS_FP_DBL_TYPE(type))
        emitter.emit_fld(x86_opnds.get_m_opnd(this), type == JIT_TYPE_DOUBLE);
    else
        emitter.emit_fild(x86_opnds.get_m_opnd(this), type == JIT_TYPE_LONG);
}
void Mem_Operand::mov_fp_stk_to(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, unsigned pop) {
    if (IS_FP_DBL_TYPE(type))
        emitter.emit_fst(x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE,pop);
    else
        emitter.emit_fist_pop(x86_opnds.get_m_opnd(this),type == JIT_TYPE_LONG);
}
void Mem_Operand::emit_fp_op(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                             X86_FP_Opcode opc,bool pop_stk0,Operand* dst) {
    assert(dst->is_fp_stk() && ((Fp_Stk*)dst)->stk == 0 && IS_FP_DBL_TYPE(type));
    assert(!pop_stk0);
    emitter.emit_fp_op_mem(opc,x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE);
}

//
// routines for emitting code (Spill_Operand)
//
void Spill_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                         const R_Opnd *const dst) {
    emitter.emit_mov(dst,x86_opnds.get_m_opnd(this));
}
void Spill_Operand::emit_cmov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                          const R_Opnd *const dst, X86_CC cc, unsigned is_signed) {
    emitter.emit_cmov(dst,x86_opnds.get_m_opnd(this), cc, is_signed);
}
void Spill_Operand::emit_alu_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                   X86_ALU_Opcode opc,Operand*dst) {
    emitter.emit_alu(opc,x86_opnds.get_r_opnd(dst),x86_opnds.get_m_opnd(this));
}
void Spill_Operand::emit_imul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                 Operand*dst){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_m_opnd(this));
    else assert(0);
}
void Spill_Operand::emit_imul_inst_3(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, 
                                     Operand*dst, Operand *imm){
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),x86_opnds.get_m_opnd(this),
        &Imm_Opnd(((Imm_Operand *)imm)->imm()));
    else assert(0);
}
void Spill_Operand::emit_push(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    emitter.emit_push(x86_opnds.get_m_opnd(this));
}
void Spill_Operand::emit_pop(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    emitter.emit_pop(x86_opnds.get_m_opnd(this));
}
void Spill_Operand::emit_fld(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    if (IS_FP_DBL_TYPE(type))
        emitter.emit_fld(x86_opnds.get_m_opnd(this), type == JIT_TYPE_DOUBLE);
    else
        emitter.emit_fild(x86_opnds.get_m_opnd(this), type == JIT_TYPE_LONG);
}
void Spill_Operand::mov_fp_stk_to(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, unsigned pop) {
    if (IS_FP_DBL_TYPE(type))
        emitter.emit_fst(x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE,pop);
    else
        emitter.emit_fist_pop(x86_opnds.get_m_opnd(this),type == JIT_TYPE_LONG);
}
void Spill_Operand::emit_fp_op(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                               X86_FP_Opcode opc,bool pop_stk0,Operand* dst) {
    assert(dst->is_fp_stk() && ((Fp_Stk*)dst)->stk == 0 && IS_FP_DBL_TYPE(type));
    assert(!pop_stk0);
    emitter.emit_fp_op_mem(opc,x86_opnds.get_m_opnd(this),type == JIT_TYPE_DOUBLE);
}

//
// routines for emitting code (Const_Operand)
//
void Const_Operand::emit_mov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
                                    const R_Opnd *const dst) {
    emit_data(emitter.data_emitter);
    emitter.emit_mov(dst,&M_Opnd((unsigned)_data_label));
    
}
void Const_Operand::emit_cmov_to_reg(O3_Emitter& emitter,X86_Opnd_Pool& xopnds,
                                     const R_Opnd *const dst, X86_CC cc, unsigned is_signed) {
    emit_data(emitter.data_emitter);
    emitter.emit_cmov(dst,&M_Opnd((unsigned)_data_label), cc, is_signed);
    
}
void Const_Operand::emit_alu_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                   X86_ALU_Opcode opc, Operand*dst) {
    emit_data(emitter.data_emitter);
    emitter.emit_alu(opc,x86_opnds.get_r_opnd(dst),&M_Opnd((unsigned)_data_label));
}
void Const_Operand::emit_imul_inst(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                                   Operand*dst) {
    emit_data(emitter.data_emitter);
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),&M_Opnd((unsigned)_data_label));
    else assert(0);
}
void Const_Operand::emit_imul_inst_3(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                                     Operand*dst, Operand *imm) {
    emit_data(emitter.data_emitter);
    if (dst->assigned_preg() != n_reg)
        emitter.emit_imul(&R_Opnd(dst->assigned_preg()),&M_Opnd((unsigned)_data_label),
        &Imm_Opnd(((Imm_Operand *)imm)->imm()));
    else assert(0);
}
void Const_Operand::emit_push(O3_Emitter& emitter,X86_Opnd_Pool& xopnds){
    emit_data(emitter.data_emitter);
    emitter.emit_push(&M_Opnd((unsigned)_data_label));
}
void Const_Operand::emit_pop(O3_Emitter& emitter,X86_Opnd_Pool& xopnds) {
    emit_data(emitter.data_emitter);
    emitter.emit_pop(&M_Opnd((unsigned)_data_label));
}
void Const_Operand::emit_fld(O3_Emitter& emitter,X86_Opnd_Pool& xopnds) {
    assert(IS_FP_DBL_TYPE(type));

#ifdef ORP_NT
    if ((type == JIT_TYPE_FLOAT && _fpclass(val.f) == _FPCLASS_PZ) ||
        (type == JIT_TYPE_DOUBLE && _fpclass(val.d) == _FPCLASS_PZ))
        emitter.emit_fldz();

    else if (type == JIT_TYPE_FLOAT && val.f == 1.0 && !_isnan(val.f) ||
        type == JIT_TYPE_DOUBLE && val.d == 1.0 && !_isnan(val.d))
        emitter.emit_fld1();
    else
    {
        emit_data(emitter.data_emitter);
        emitter.emit_fld(&M_Opnd((unsigned)_data_label),type == JIT_TYPE_DOUBLE);
    }
#else
    // I'm not sure how to test for positive 0 on Linux,
    // so we don't emit fldz.
    // Would it be OK to use the following?
    // if(!signbit(v) && fpclassify(v) == FP_ZERO)
    if (type == JIT_TYPE_FLOAT && val.f == 1.0 && !isnan(val.f) ||
        type == JIT_TYPE_DOUBLE && val.d == 1.0 && !isnan(val.d))
        emitter.emit_fld1();
    else
    {
        emit_data(emitter.data_emitter);
        emitter.emit_fld(&M_Opnd((unsigned)_data_label),type == JIT_TYPE_DOUBLE);
    }
#endif
}
void Const_Operand::emit_fp_op(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                               X86_FP_Opcode opc,bool pop_stk0,Operand* dst) {
    emit_data(emitter.data_emitter);
    assert(dst->is_fp_stk() && ((Fp_Stk*)dst)->stk == 0 && IS_FP_DBL_TYPE(type));
    assert(!pop_stk0);
    emitter.emit_fp_op_mem(opc,&M_Opnd((unsigned)_data_label),type == JIT_TYPE_DOUBLE);
}
void Const_Operand::emit_data(O3_Data_Emitter& data_emitter) {
    if (_data_label != NULL) return;
    if (type == JIT_TYPE_FLOAT)
        _data_label = data_emitter.emit_float(val.f); 
    else if (type == JIT_TYPE_DOUBLE)
    {
        // For a push of a Const, we will see the high operand first, but for an fld
        // of a Const, we will only ever see the low operand.  Try to arrange it so
        // that it works regardless of which is seen first.
        if (is_hi())
        {
            // If we saw the low operand already, we would have already set the
            // _data_label for the high part, and we'd never get here.
            _data_label = 4 + data_emitter.emit_double(val.d);
        }
        else
        {
            if (((Const_Operand *)hi_opnd())->data_label() == NULL)
            {
                _data_label = data_emitter.emit_double(val.d);
                ((Const_Operand *)hi_opnd())->set_data_label(_data_label + 4);
            }
            else
            {
                _data_label = ((Const_Operand *)hi_opnd())->data_label() - 4;
            }
        }
    }
    else assert(0);

}

//
// routines for emitting code (Fp_Stk)
//
void Fp_Stk::mov_fp_stk_to(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds, unsigned pop) {
    //
    // move ST(0) to the current stk
    //
    assert(pop);  // don't have the non-pop version in x86.cpp
    emitter.emit_fstp(stk);
}
void Fp_Stk::emit_fp_op(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds,
                        X86_FP_Opcode opc,bool pop_stk0,Operand* dst) {
    assert(dst->is_fp_stk() && /*((Fp_Stk*)dst)->stk == 0 &&*/ IS_FP_DBL_TYPE(type));
    if (((Fp_Stk*)dst)->stk == 0)
    {
        assert(!pop_stk0);
        emitter.emit_fp_op(opc,stk);
    }
    else
    {
        assert(stk == 0);
        emitter.emit_fp_op(opc, ((Fp_Stk*)dst)->stk, pop_stk0);
    }
}

//
// instruction emit rouintes
//
void Widen_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    if (_dst->assigned_preg() != n_reg)
        emitter.emit_widen(&R_Opnd(_dst->assigned_preg()),
                x86_opnds.get_rm_opnd(_srcs[0]),_is_signed,_is_half);
    else assert(0);
}

void Assign_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    if (_dst->assigned_preg() != n_reg) {
        R_Opnd *dreg = x86_opnds.get_r_opnd(_dst);
        if (_srcs[0]->kind != Operand::Immediate)
        {
            if (_cmov)
                _srcs[0]->emit_cmov_to_reg(emitter,x86_opnds,dreg,
                Branch_Inst::_br_cc[_condition], _is_signed_cmov);
            else
                _srcs[0]->emit_mov_to_reg(emitter,x86_opnds,dreg);
        }
        else
            ((Imm_Operand*)_srcs[0])->emit_mov_to_reg(emitter,x86_opnds,dreg,_ok_with_carry);
    } else if (_dst->is_fp_stk() && !is_fp_pop()) {  // emit fld/fild
        _srcs[0]->emit_fld(emitter,x86_opnds);
    } else if (_srcs[0]->is_fp_stk()) {// emit fst
        assert(((Fp_Stk*)_srcs[0])->stk == 0); // store top of fp stack
        _dst->mov_fp_stk_to(emitter,x86_opnds,_do_pop);
    } else if (_dst->is_array())
        // JMS- if it's an xastore, we need to pass the right
        // operand width to emit_mov_to_reg(). 
        _srcs[0]->emit_mov_to_mem(emitter,x86_opnds,x86_opnds.get_m_opnd(_dst), opnd_size(this));
    else if (_dst->is_reg() || _dst->is_mem())
    {
        if (_dst->was_originally_array_opnd())
            _srcs[0]->emit_mov_to_mem(emitter,x86_opnds,x86_opnds.get_m_opnd(_dst), opnd_size(this));
        // Hack for the fast write barrier.  Make sure we write a byte into the
        // card table, not a word.
        else if (_dst->kind == Operand::Field && _dst->type == JIT_TYPE_BYTE && is_gc_unsafe())
            _srcs[0]->emit_mov_to_mem(emitter,x86_opnds,x86_opnds.get_m_opnd(_dst), opnd_8);
        else
            _srcs[0]->emit_mov_to_mem(emitter,x86_opnds,x86_opnds.get_m_opnd(_dst));
    }
    else assert(0);
}

void Branch_Inst::emit(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
    Cfg_Node *target;
    if (_target != NULL)
        target = _target;
    else if (_cfg_idx != -1)
		target = emitter.curr_node->out_edges(_cfg_idx);
    else if (_branch_fallthrough)
        target = emitter.curr_node->get_fallthrough();
    else if (_jump_over_next_br)
        target = NULL;
    else
        target = emitter.curr_node->get_branch_target();
    assert(_jump_over_next_br || target != NULL);
    //
    // if target has been emitted, then this is backward branch
    //
    if (_jump_over_next_br) {
        assert(emitter.patch_switch_br == NULL);
        emitter.emit_branch8(_br_cc[_kind],&Imm_Opnd(0),is_signed);
        emitter.patch_switch_br = emitter.get_next() - 1;
        //
        // find target of the switch branch
        //
        Inst *n_br = next();
        while (!n_br->is_branch()) n_br = n_br->next();
        n_br->next()->set_target_of_optimized_switch_br();
    } else if (target->code_offset() != -1) {
		int disp = target->code_offset() - emitter.get_offset();
        emitter.emit_branch(_br_cc[_kind], disp, is_signed);
    } else { // forward branch
        emitter.emit_branch32(_br_cc[_kind],&Imm_Opnd(0),is_signed);
        //
        // create a branch so later we can fill in the right offset
        //
        unsigned patch_offset = emitter.get_offset() - 4;
        emitter.code_patch = 
            new(emitter.mem) Branch_Patch(emitter.code_patch,patch_offset,target);
    }
}

//
// change branch condition
//
void Branch_Inst::commute_condition() {
    assert(can_commute());
    enum Kind {beq,bne,blt,bge,bgt,ble,n_br};
    switch (_kind) {
    case blt:
        _kind = bge; break;
    case bgt:
        _kind = ble; break;
    case ble:
        _kind = bgt; break;
    case bge:
        _kind = blt; break;
    case beq:
        _kind = bne; break;
    case bne:
        _kind = beq; break;  
    default:
        assert(0);
    }
}

void Jump_Inst::emit_jump_switch(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds){
    Cfg_Node *node = emitter.curr_node;
    assert(_srcs[0]->kind == Operand::Field); // indirect jump
    M_Opnd *m  = x86_opnds.get_m_opnd(_srcs[0]);
    m->disp.value = (int)emitter.data_emitter.get_label();
    emitter.emit_jump(m);
    if (_sw == NULL) {
        //
        // create table patches for filling switch table later
        // regular tableswitch.  Targets are encoded in out edges of the node
        //
        Cfg_Int i;
        for (i=1; i<node->out_edge_size(); i++) {
            Cfg_Node *target = node->out_edges(i);
            char *label = emitter.data_emitter.get_label();
            emitter.table_switch = 
            new (emitter.mem)Table_Switch_Patch(emitter.table_switch,label,target);
            emitter.data_emitter.emit_int(0);
        }
    } else {
        //
        // lookupswitch is turned into a tableswitch.  We go over lookupswitch 
        // info to get targets
        //
        int low, high;
        find_low_high(_sw->matches, _sw->size, low, high);
        int tab_sz = high-low+1;
        Mem_Manager mm(tab_sz*sizeof(Cfg_Node*));
        Cfg_Node **table = (Cfg_Node**)mm.alloc(tab_sz*sizeof(Cfg_Node*));
        Cfg_Node *default_node = node->out_edges(0);
        int i;
        for (i = 0; i < tab_sz; i++) table[i] = default_node;
        for (i = 0; i < _sw->size; i++)
            table[_sw->matches[i]-low] = node->out_edges(i+1);
        //
        // emit switch table
        //
        for (i = 0; i < tab_sz; i++) {
            char *label = emitter.data_emitter.get_label();
            emitter.table_switch =
            new (emitter.mem)Table_Switch_Patch(emitter.table_switch,label,table[i]);
            emitter.data_emitter.emit_int(0);
        }

    }
}

void Jump_Inst::emit_jump(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
    Cfg_Node *target;
    if (_branch_fallthrough)
        target = emitter.curr_node->get_fallthrough();
    else
        target = emitter.curr_node->get_branch_target();
    assert(target != NULL);
    //
    // if target has been emitted, then this is backward branch
    //
    if (target->code_offset() != -1) {
		int disp = target->code_offset() - emitter.get_offset();
        emitter.emit_jump(disp);
    } else { // forward branch
        emitter.emit_jump32(&Imm_Opnd(0));
        //
        // create a branch so later we can fill in the right offset
        //
		unsigned patch_offset = emitter.get_offset() - 4;
		emitter.code_patch = 
        new(emitter.mem) Branch_Patch(emitter.code_patch,patch_offset,target);
    }
}

void Jump_Inst::emit_jump_s(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
  if (src(0)->assigned_preg() != n_reg)
    emitter.emit_jump(&R_Opnd(src(0)->assigned_preg()));
  else 
    emitter.emit_jump(x86_opnds.get_m_opnd(src(0)));
}

void Jump_Inst::emit(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
  switch (kind)
    {
    case jump_switch:
        emit_jump_switch(emitter,x86_opnds);
      break;
    case jump:
        emit_jump(emitter,x86_opnds);
      break;
    case jump_s:
        emit_jump_s(emitter,x86_opnds);
      break;
    default:
      assert(0);
      break;
    }
}

void NextPC_Inst::emit(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
    assert(_dst->is_reg());
    Imm_Operand imm(0x12345678, JIT_TYPE_RETADDR);
    Operand *opnd = &imm;
    if (_dst->assigned_preg() == n_reg)
        opnd->emit_mov_to_mem(emitter, x86_opnds, x86_opnds.get_m_opnd(_dst));
    else
        opnd->emit_mov_to_reg(emitter, x86_opnds, x86_opnds.get_r_opnd(_dst));
    unsigned patch_offset = emitter.get_offset() - 4;
    emitter.code_patch =
        new(emitter.mem) NextPC_Patch(emitter.code_patch, patch_offset,
        emitter.curr_node->get_jsr_succ());
}

void Compare_Inst::emit(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
    if (kind == test)
        _srcs[0]->emit_test(emitter,x86_opnds);
    else if (kind == cmp) {
        if(!IS_FP_DBL_TYPE(type())) 
            _srcs[1]->emit_alu_inst(emitter,x86_opnds,cmp_opc,_srcs[0]);
        else {
            assert(_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0);
            if (O3_is_PPro)
            {
                emitter.emit_fcomip(((Fp_Stk*)_srcs[1])->stk);
                return;
            }
            // XXX- so far, no way to produce "fcompp".
            if (_two_pops)
            {
                assert(_srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 1);
                emitter.emit_fcompp();
            }
            else
                _srcs[1]->emit_fp_op(emitter,x86_opnds,fcomp_opc,false,_srcs[0]);
        }
    } else assert(0);
}

void Add_Inst::emit(O3_Emitter& emitter,X86_Opnd_Pool& x86_opnds) {
    //if (kind != fadd) assert(SAME_DST_SRC_REG(_dst,_srcs[0]));

    if (kind == add) 
    {
        if (!_must_use_lea && SAME_DST_SRC_REG(_dst,_srcs[0]))
            _srcs[1]->emit_alu_inst(emitter,x86_opnds,add_opc,_dst);
        else if (!_must_use_lea && SAME_DST_SRC_REG(_dst,_srcs[1]))
            _srcs[0]->emit_alu_inst(emitter,x86_opnds,add_opc,_dst);  // commute
        else  // use lea instruction
        {
            assert(_dst->assigned_preg() != n_reg);
            emitter.emit_lea(&R_Opnd(_dst->assigned_preg()),
                x86_opnds.get_m_opnd_lea(_srcs[0], _srcs[1], false));
            _must_use_lea = true; // prevent following test inst being eliminated
        }
    }
    else if (kind == adc) 
        _srcs[1]->emit_alu_inst(emitter,x86_opnds,adc_opc,_dst);
    else if (kind == fadd) {
        if (_srcs[0]->is_fp_stk() && _srcs[1]->is_fp_stk())
        {
            if (_srcs[1] == _dst)
                _srcs[0]->emit_fp_op(emitter, x86_opnds, fadd_opc, _fp_pop_after_compute, _dst);
            else
                _srcs[1]->emit_fp_op(emitter, x86_opnds, fadd_opc, _fp_pop_after_compute, _dst);
        }
        else if (_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0)
            _srcs[1]->emit_fp_op(emitter,x86_opnds,fadd_opc,_fp_pop_after_compute,_dst);
        else if (_srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 0)
            _srcs[0]->emit_fp_op(emitter,x86_opnds,fadd_opc,_fp_pop_after_compute,_dst);
        else
            assert(0);
    } else assert(0);
}

void Sub_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    //if (kind != fsub) assert(SAME_DST_SRC_REG(_dst,_srcs[0]));

    if (kind == sub)
    {
        if (!_must_use_lea && SAME_DST_SRC_REG(_dst,_srcs[0]))
            _srcs[1]->emit_alu_inst(emitter,x86_opnds,sub_opc,_dst);
        else
        {
            assert(_dst->assigned_preg() != n_reg);
            assert(_srcs[0]->assigned_preg() != n_reg);
            assert(_srcs[1]->assigned_preg() != n_reg || _srcs[1]->kind == Operand::Immediate);
            emitter.emit_lea(&R_Opnd(_dst->assigned_preg()),
                x86_opnds.get_m_opnd_lea(_srcs[0], _srcs[1], true));
            _must_use_lea = true; // prevent following test inst being eliminated
        }
    }
    else if (kind == sbb)
        _srcs[1]->emit_alu_inst(emitter,x86_opnds,sbb_opc,_dst);
    else if (kind == fsub) {
        if (_srcs[0]->is_fp_stk() && _srcs[1]->is_fp_stk())
        {
            if (_srcs[1] == _dst)
                _srcs[0]->emit_fp_op(emitter, x86_opnds, fsubr_opc, _fp_pop_after_compute, _dst);
            else
                _srcs[1]->emit_fp_op(emitter, x86_opnds, fsub_opc, _fp_pop_after_compute, _dst);
        }
        else if (_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0)
            _srcs[1]->emit_fp_op(emitter,x86_opnds,fsub_opc,_fp_pop_after_compute,_dst);
        else if (_srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 0)
            _srcs[0]->emit_fp_op(emitter,x86_opnds,fsubr_opc,_fp_pop_after_compute,_dst);
        else
            assert(0);
    } else assert(0);
}

void Mul_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {

//#ifdef INLINE_NATIVE
	if (kind == smul) {
            _srcs[1]->emit_mul_inst(emitter,x86_opnds,_dst);
	}else
//#endif
    if (kind == mul) {
        if (SAME_DST_SRC_REG(_dst,_srcs[0]))
        {
            _srcs[1]->emit_imul_inst(emitter,x86_opnds,_dst);
        }
        else
        {
            assert(_srcs[1]->kind == Operand::Immediate);
            _srcs[0]->emit_imul_inst_3(emitter,x86_opnds,_dst, _srcs[1]);
        }
    } else if (kind == fmul) {
        if (_srcs[0]->is_fp_stk() && _srcs[1]->is_fp_stk())
        {
            if (_srcs[1] == _dst)
                _srcs[0]->emit_fp_op(emitter, x86_opnds, fmul_opc, _fp_pop_after_compute, _dst);
            else
                _srcs[1]->emit_fp_op(emitter, x86_opnds, fmul_opc, _fp_pop_after_compute, _dst);
        }
        else if (_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0)
            _srcs[1]->emit_fp_op(emitter,x86_opnds,fmul_opc,_fp_pop_after_compute,_dst);
        else if (_srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 0)
            _srcs[0]->emit_fp_op(emitter,x86_opnds,fmul_opc,_fp_pop_after_compute,_dst);
        else
            assert(0);
    } else assert(0);
}

void Div_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    if (kind == fdiv) {
        if (_srcs[0]->is_fp_stk() && _srcs[1]->is_fp_stk())
        {
            if (_srcs[1] == _dst)
                _srcs[0]->emit_fp_op(emitter, x86_opnds, fdivr_opc, _fp_pop_after_compute, _dst);
            else
                _srcs[1]->emit_fp_op(emitter, x86_opnds, fdiv_opc, _fp_pop_after_compute, _dst);
        }
        else if (_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0)
            _srcs[1]->emit_fp_op(emitter,x86_opnds,fdiv_opc,_fp_pop_after_compute,_dst);
        else if (_srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 0)
            _srcs[0]->emit_fp_op(emitter,x86_opnds,fdivr_opc,_fp_pop_after_compute,_dst);
        else
            assert(0);
    } else if (kind == frem) {
        assert(_srcs[0]->is_fp_stk() && ((Fp_Stk*)_srcs[0])->stk == 0 &&
               _srcs[1]->is_fp_stk() && ((Fp_Stk*)_srcs[1])->stk == 1);
        emitter.emit_frem();
    } else assert(0);
}

void Neg_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    if (kind == neg)
        emitter.emit_neg(x86_opnds.get_rm_opnd(_srcs[0]));
    else assert(0); // fneg has been expanded
}

void Bitwise_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    assert(SAME_DST_SRC_REG(_dst,_srcs[0]));

    switch (kind) {
    case k_and:
        _srcs[1]->emit_alu_inst(emitter,x86_opnds,and_opc,_dst); break;
    case k_or:
        _srcs[1]->emit_alu_inst(emitter,x86_opnds,or_opc,_dst);  break;
    case k_xor:
        _srcs[1]->emit_alu_inst(emitter,x86_opnds,xor_opc,_dst); break;
    case shl:
        _srcs[1]->emit_shift(emitter,x86_opnds,shl_opc,_dst); break;
    case shr:
        _srcs[1]->emit_shift(emitter,x86_opnds,shr_opc,_dst); break;
    case sar:
        _srcs[1]->emit_shift(emitter,x86_opnds,sar_opc,_dst); break;
    default:
        assert(0);
    }
}

void Push_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    _srcs[0]->emit_push(emitter,x86_opnds);
    x86_opnds.frame->push();
    emitter.n_words_pushed++;
}

void Pop_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    _dst->emit_pop(emitter,x86_opnds);
    x86_opnds.frame->pop();
    emitter.n_words_pushed--;
}

void Return_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    if (x86_opnds.frame->n_args == 0)
        emitter.emit_ret();
    else
        emitter.emit_ret(&Imm_Opnd(x86_opnds.frame->n_args*sizeof(int)));
}

#ifdef STAT_INDIRECT_CALL
void Stat_Call_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
		void* call_addr = orp_get_rt_support_addr(ORP_RT_STAT_INDIRECT_CALL) ;
		assert(call_addr) ;
		emitter.emit_call((char*)call_addr);
		unsigned patch_offset = emitter.get_offset() - 4;
		emitter.code_patch = new (emitter.mem) 
			Call_Patch(emitter.code_patch, patch_offset, (char*)call_addr);
}
#endif

#ifdef O3_VTune_Support
void VTune_Call_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
//	void* call_addr = orp_get_rt_support_addr(ORP_RT_VTUNE_METHOD_CALL) ;
	void* call_addr = NULL;
	if (m_enter)
		call_addr = (void*)iJIT_MethodEntered;
	else
		call_addr = (void*)iJIT_MethodExited;
	assert(call_addr) ;
	emitter.emit_call((char*)call_addr);
	unsigned patch_offset = emitter.get_offset() - 4;
	emitter.code_patch = new (emitter.mem) 
		Call_Patch(emitter.code_patch, patch_offset, (char*)call_addr);
//    x86_opnds.frame->pop(emitter.n_words_pushed);
//    emitter.n_words_pushed = 0;
}
#endif

void Call_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    void *call_addr = NULL;
    switch (kind) {
    case virtual_call:
    case interface_call:
        emitter.emit_call(x86_opnds.get_rm_opnd(_srcs[0])); break;
    case static_call:
    case special_call: {
        assert(_srcs[0]->kind == Operand::Const);
        void *addr = method_get_indirect_address((Method_Handle)((Const_Operand *)_srcs[0])->val.addr);
        emitter.emit_call(&M_Opnd((unsigned)addr)); break; }
    case new_call:
        assert(get_chandle() != NULL);
        if (class_has_non_default_finalizer(get_chandle()))
            call_addr = orp_get_rt_support_addr(ORP_RT_NEW_WITH_FINALIZER_RESOLVED);
        else
            call_addr = orp_get_rt_support_addr(ORP_RT_NEW_RESOLVED);
        break;
    case newarray_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_NEWARRAY); break;
    case anewarray_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_ANEWARRAY_RESOLVED);break;
    case multinew_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_MULTIANEWARRAY_RESOLVED);break;
    case monenter_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_MONITOR_ENTER);break;
    case monexit_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_MONITOR_EXIT);break;
    case monenter_static_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_MONITOR_ENTER_STATIC);break;
    case monexit_static_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_MONITOR_EXIT_STATIC);break;
    case athrow_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_ATHROW); break;
    case athrow_lazy_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_ATHROW_LAZY); break;
    case llsh_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LSHL); break;
    case lrsh_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LSHR); break;
    case lrsz_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LUSHR); break;
    case f2i_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_F2I); break;
    case d2i_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_D2I); break;
    case f2l_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_F2L); break;
    case d2l_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_D2L); break;
    case frem_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_FREM); break;
    case drem_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_DREM); break;
    case lmul_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LMUL); break;
#ifdef ORP_LONG_OPT
	case lmul_const_multiplier_call:
		call_addr = orp_get_rt_support_addr(ORP_RT_LMUL_CONST_MULTIPLIER); break;
#endif
    case ldiv_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LDIV); break;
	case const_ldiv_call:
		call_addr = orp_get_rt_support_addr(ORP_RT_CONST_LDIV) ;break ;
	case const_lrem_call:
		call_addr = orp_get_rt_support_addr(ORP_RT_CONST_LREM) ;break ;
    case lrem_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LREM); break;
    case getstring_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_LDC_STRING); break;
    case bounds_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_IDX_OUT_OF_BOUNDS); break;
    case checkcast_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_CHECKCAST); break;
    case instanceof_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_INSTANCEOF); break;
    case classinit_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_INITIALIZE_CLASS); break;
    case resintfc_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_GET_INTERFACE_VTABLE_VER0); break;
    case aastore_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_AASTORE); break;
#ifndef JIT_SAPPHIRE
    case writebarrier_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_WRITE_BARRIER_FASTCALL); break;
#else
    case writebarrier_call:
		if (_n_args == 3) { // base, off, and value
			switch(wb_kind) // based on value's type
			{
			case JIT_TYPE_BYTE:
			case JIT_TYPE_BOOLEAN:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_INT8);  break;
			case JIT_TYPE_DOUBLE:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_DOUBLE);break;
			case JIT_TYPE_FLOAT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_FLOAT); break;
			case JIT_TYPE_INT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_INT32); break;
			case JIT_TYPE_LONG:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_INT64); break;
			case JIT_TYPE_SHORT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_INT16); break;
			case JIT_TYPE_CHAR:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_UINT16);break;
			case JIT_TYPE_CLASS:
			case JIT_TYPE_ARRAY:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_REF);   break;
			default:
				assert(0);
			}
		} else {
			assert(_n_args == 2);  /// base and value
			assert(_args[0]->type() == JIT_TYPE_CLASS);
			call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_WRITE_GLOBAL_SLOT); 
		}
		break;
    case acmp_call:
        call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_REF_EQUAL); break;
#endif
//
//GC read_barrier support, similar with write_barrier (JIT_SAPPHIRE)
//
#ifdef JIT_SAPPHIRE
    case readbarrier_call:
		if (_n_args == 2) { // base, off

			switch(wb_kind) // based on base's type
			{
			case JIT_TYPE_BYTE:
			case JIT_TYPE_BOOLEAN:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_INT8);  break;
			case JIT_TYPE_DOUBLE:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_DOUBLE);break;
			case JIT_TYPE_FLOAT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_FLOAT); break;
			case JIT_TYPE_INT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_INT32); break;
			case JIT_TYPE_LONG:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_INT64); break;
			case JIT_TYPE_SHORT:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_INT16); break;
			case JIT_TYPE_CHAR:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_UINT16);break;
			case JIT_TYPE_CLASS:
			case JIT_TYPE_ARRAY:
				call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_REF);   break;
			default:
				assert(0);
			}
		} else {
			assert(_n_args == 1);  //base
			assert(_args[0]->type() == JIT_TYPE_ADDR);
			call_addr = orp_get_rt_support_addr(ORP_RT_GC_HEAP_READ_GLOBAL_SLOT); 
		}
		break;
#endif
#ifdef STAT_INDIRECT_CALL
		case stat_indirect_call:
			//::for debug
			//call_addr = orp_get_rt_support_addr(ORP_RT_GC_READ_BARRIER_DEBUG);  break;
			//::for debug
			call_addr = orp_get_rt_support_addr(ORP_RT_STAT_INDIRECT_CALL) ;
			break ;
#endif
    default: assert(0);
    }
    //
    // create call patch
    //
    if (call_addr != NULL) {
        emitter.emit_call((char*)NULL);
        unsigned patch_offset = emitter.get_offset() - 4;
        emitter.code_patch = new (emitter.mem) 
        Call_Patch(emitter.code_patch, patch_offset, (char*)call_addr);
    }
    x86_opnds.frame->pop(emitter.n_words_pushed);
    emitter.n_words_pushed = 0;
}

void Fxch_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    assert(_srcs[0]->is_fp_stk());
    assert(_srcs[1]->is_fp_stk());
    Fp_Stk *s1 = (Fp_Stk *) _srcs[0];
    Fp_Stk *s2 = (Fp_Stk *) _srcs[1];
    assert(s1->stk == 0);
    assert(s2->stk != 0);
    emitter.emit_fxch(s2->stk);
}

void Math_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds)
{
    assert(_srcs[0]->is_fp_stk());
    Fp_Stk *s1 = (Fp_Stk *) _srcs[0];
    assert(s1->stk == 0);
    switch (kind)
    {
    case sin:
        emitter.emit_fsin(); break;
    case cos:
        emitter.emit_fcos(); break;
    case sqrt:
        emitter.emit_fsqrt(); break;
    case rndint:
        emitter.emit_frndint(); break;
    case abs:
        emitter.emit_fabs(); break;
    default:
        assert(0); break;
    }
}

void Deref_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds)
{
    Imm_Operand imm(0, JIT_TYPE_ADDR);
    Operand *zero = &imm;
    if (_srcs[0]->kind == Operand::Immediate)
    {
        Static_Operand st((void *)((Imm_Operand *)_srcs[0])->imm(), JIT_TYPE_ADDR, 0);
        zero->emit_alu_inst(emitter, x86_opnds, cmp_opc, &st);
    }
    else
    {
        assert(_srcs[0]->is_reg());
        assert(_srcs[0]->assigned_preg() != n_reg);
        Field_Operand fld((Reg_Operand *)_srcs[0], 0, JIT_TYPE_ADDR, 0,true);
        zero->emit_alu_inst(emitter, x86_opnds, cmp_opc, &fld);
    }
}

void Native_Inst::emit(O3_Emitter& emitter, X86_Opnd_Pool& x86_opnds) {
    switch (kind) {
    case cdq:
        emitter.emit_cdq(); break;
    case idiv:
        emitter.emit_div(x86_opnds.get_rm_opnd(_srcs[2]),1); break;
    case fchs:
        emitter.emit_fchs();   break;
    case fnstsw:
        emitter.emit_fnstsw(); break;
    case sahf:
        emitter.emit_sahf();   break;
    default:  assert(0);
    }
}

const Inst::Info Widen_Inst::_info = {"$t = widen $0",0,0,0,0};

const Inst::Info Assign_Inst::_info = {"$t =.$y $0",0,0,0,0}; 

const Inst::Info Obj_Info_Inst::_info[Obj_Info_Inst::n_obj_info] = {
    {"$t = length $0",1,0,0,0},
    {"$t = vtable $0",1,0,0,0},
    {"$t = interfacevtable $0, $1",1,1,0,0}
};

const Inst::Info Compare_Inst::_info[Compare_Inst::n_cmp] = {
    {"$t = cmp.$y  $0, $1",  1, 0, 0, 0},
    {"$t = cmp_lt.$y $0, $1",1, 0, 0, 0},
    {"$t = cmp_gt.$y $0, $1",1, 0, 0, 0},
    {"$t = test.$y   $0",    1, 0, 0, 0} 
};
    // if{eq,ne,lt,ge,gt,le} int comparisons against zero		
const Inst::Info Branch_Inst::_info[Branch_Inst::n_br] = {
    {"beq  $0", 0, 0, 0, 0},
    {"bne  $0", 0, 0, 0, 0},
    {"blt  $0", 0, 0, 0, 0},
    {"bge  $0", 0, 0, 0, 0},
    {"bgt  $0", 0, 0, 0, 0},
    {"ble  $0", 0, 0, 0, 0},
    {"bp   $0", 0, 0, 0, 0},
    {"bnp  $0", 0, 0, 0, 0} 
};
const X86_CC Branch_Inst::_br_cc[Branch_Inst::n_br] = {
    cc_eq, cc_ne, cc_lt, cc_ge, cc_gt, cc_le, cc_p, cc_np
};

const Inst::Info Return_Inst::_info = {"return",0,0,0, 0};

const Inst::Info Add_Inst::_info[Add_Inst::n_add] = {
    {"$t = add.$y $0, $1",  1, 0, 0, 0}, // add
    {"$t = adc $0, $1",     1, 0, 0, 0}, // adc
    {"$t = fadd.$y $0, $1", 1, 0, 0, 0}  // fadd
};

const Inst::Info Sub_Inst::_info[Sub_Inst::n_sub] = {
    {"$t = sub.$y  $0, $1", 1, 0, 0, 0}, // sub
    {"$t = sbb $0, $1",     1, 0, 0, 0}, // sbb
    {"$t = fsub.$y $0, $1", 1, 0, 0, 0}  // fsub
};

const Inst::Info Mul_Inst::_info[Mul_Inst::n_mul] = {
    {"$t = mul.$y  $0, $1", 1, 0, 0, 0}, // mul
    {"$t = fmul.$y $0, $1", 1, 0, 0, 0}  // fmul
//#ifdef INLINE_NATIVE
	,{"$t = smul.$y $0, $1", 1, 0, 0, 0}  // smul
//#endif
};

const Inst::Info Div_Inst::_info[Div_Inst::n_div] = {
    {"$t = div.$y  $0, $1", 1, 1, 0, 0}, // div
    {"$t = fdiv.$y $0, $1", 1, 0, 0, 0}, // fdiv
    {"$t = rem.$y  $0, $1", 1, 1, 0, 0}, // rem
    {"$t = frem.$y $0, $1", 1, 1, 0, 0}  // frem
};

const Inst::Info Neg_Inst::_info[Neg_Inst::n_neg] = {
    {"$t = neg.$y  $0", 1, 0, 0, 1},  // neg
    {"$t = fneg.$y $0", 1, 0, 0, 1}   // fneg
};

const Inst::Info Bitwise_Inst::_info[Bitwise_Inst::n_bitwise] = {
    {"$t = and.$y  $0, $1", 1, 0, 0, 0},  // and
    {"$t = or.$y   $0, $1", 1, 0, 0, 0},  // or
    {"$t = xor.$y  $0, $1", 1, 0, 0, 0},  // xor
    {"$t = shl.$y  $0, $1", 1, 0, 0, 0},  // shl
    {"$t = shr.$y  $0, $1", 1, 0, 0, 0},  // shr
    {"$t = sar.$y  $0, $1", 1, 0, 0, 0}   // sar
};

const Inst::Info Call_Inst::_info[Call_Inst::n_call] = {
    {"call $0",              0, 0, 0, 0},
    {"call mh_addr[$0]",     0, 0, 0, 0},
    {"call new",             0, 0, 0, 0},
    {"call newarray",        0, 0, 0, 0},
    {"call anewarray",       0, 0, 0, 0},
    {"call multianewarray",  0, 0, 0, 0},
    {"call static[$0]",      0, 0, 0, 0},
    {"call monitorenter",0, 0, 0, 0},
    {"call monitorexit", 0, 0, 0, 0},
    {"call monitorenter_static",0, 0, 0, 0},
    {"call monitorexit_static", 0, 0, 0, 0},
//    {"call athrow[$0]",      0, 0, 0, 0},
    {"call athrow",          0, 0, 0, 0},
    {"call athrow_lazy",     0, 0, 0, 0},
    {"call interface[$0]",   0, 0, 0, 0},
    {"call llsh",            0, 0, 0, 0},
    {"call lrsh",            0, 0, 0, 0},
    {"call lrsz",            0, 0, 0, 0},
    {"call f2i",             0, 0, 0, 0},
    {"call d2i",             0, 0, 0, 0},
    {"call f2l",             0, 0, 0, 0},
    {"call d2l",             0, 0, 0, 0},
    {"call frem",            0, 0, 0, 0},
    {"call drem",            0, 0, 0, 0},
    {"call lmul",            0, 0, 0, 0},
#ifdef ORP_LONG_OPT
    {"call lmul_const",      0, 0, 0, 0},
#endif
    {"call ldiv",            0, 0, 0, 0},
    {"call lrem",            0, 0, 0, 0},
    {"call getstring",       0, 0, 0, 0},
    {"call bounds",          0, 0, 0, 0},
    {"call checkcast",       0, 0, 0, 0}, 
    {"call checkcast_notnull",0, 0, 0, 0}, 
    {"call instanceof",      0, 0, 0, 0},
    {"call classinit",       0, 0, 0, 0},
    {"call resolveinterface",0, 0, 0, 0},
    {"call aastore",         0, 0, 0, 0},
#ifndef JIT_SAPPHIRE
    {"call write_barrier",   0, 0, 0, 0}
#else
    {"call write_barrier",   0, 0, 0, 0},
    {"call read_barrier",   0, 0, 0, 0},
    {"call sapphire_acmp",   0, 0, 0, 0}
#endif
#ifdef STAT_INDIRECT_CALL
	,{"call stat_indirect_call", 0,0,0,0}//Special call for instrument before indirect branches
#endif
#ifdef O3_VTune_Support
	,{"call vtune_method_call", 0,0,0,0} // vtune support
#endif
	, { "call const_ldiv_call", 0,0,0,0}, // long division with constant divisor.
	{"call const_lrem_call", 0,0,0,0}  // long rem with constant divisor.
};

const Inst::Info Convt_Inst::_info = {"$t = $x $0", 1, 0, 0, 0}; 

void  Convt_Inst::escape(ostream& cout) { // print e.g., I2D, I2J, ...
    cout << (char)_srcs[0]->type << "2" << (char)exp->type;
}

const Inst::Info NextPC_Inst::_info = {"$t = next PC of jump", 0, 0, 0, 0}; 

const Inst::Info Jump_Inst::_info[Jump_Inst::n_jump] = {
    {"jump",     0, 0, 0, 0},
    {"jump  $0", 0, 0, 0, 0},
    {"jump  $0", 0, 0, 0, 0}
}; 

const Inst::Info Switch_Inst::_info[Switch_Inst::n_switch] = {
    {"tableswitch  $0", 1, 0, 0, 0},
    {"lookupswitch $0", 1, 0, 0, 0}
}; 

const Inst::Info String_Inst::_info = {"$t = getstring $0 $1", 0, 0, 0, 0};

const Inst::Info Classinit_Inst::_info = {"classinit $0", 0, 0, 0, 0};

const Inst::Info Writebarrier_Inst::_info = {"write_barrier $0, $1", 0, 0, 0, 0};

#ifdef JIT_SAPPHIRE
//::
// Read Barrier
//::
const Inst::Info Readbarrier_Inst::_info = {"read_barrier $0, $1", 0, 0, 0, 0};
#endif

#ifdef STAT_INDIRECT_CALL
const Inst::Info StatIndirectCall_Inst::_info = {"stat_indirect_call $0, $1", 0, 0, 0, 0};

const Inst::Info Stat_Call_Inst::_info = {"new_stat_indirect_call", 0, 0, 0, 0};
#endif

#ifdef O3_VTune_Support
const Inst::Info VTune_Call_Inst::_info = {"vtune_method_call", 0, 0, 0, 0};
#endif

const Inst::Info Type_Inst::_info[Type_Inst::n_type] = { 
    {"$t =.$y checkcast  $0, $1", 0, 0, 0, 0},
    {"$t =.$y instanceof $0, $1", 0, 1, 0, 0}
}; 

const Inst::Info Push_Inst::_info = {"push $0", 0, 0, 0, 0};

const Inst::Info Pop_Inst::_info = {"$t = pop", 0, 0, 0, 0};

const Inst::Info Fxch_Inst::_info = {"fxch $0, $1", 0, 0, 0, 0};

const Inst::Info Math_Inst::_info[Math_Inst::n_math] = {
    {"$t =    sin  $0", 0, 0, 0, 1},
    {"$t =    cos  $0", 0, 0, 0, 1},
    {"$t =    sqrt $0", 0, 0, 0, 1},
    {"$t =    rint $0", 0, 0, 0, 1},
    {"$t =    abs  $0", 0, 0, 0, 1},
};

const Inst::Info Deref_Inst::_info = {"deref $0", 1, 0, 0, 0};

const Inst::Info Native_Inst::_info[Native_Inst::n_native] = { 
    {"$t =    cdq  $0",         1, 0, 0, 0},
    {"$t =    idiv $0, $1, $2", 1, 1, 1, 0},
    {"$t =    fchs $0",         1, 1, 0, 1},
    {"$t =    fnstsw $0",       0, 0, 0, 0},
    {"$t =    sahf $0",         1, 0, 0, 0}
}; 

//#ifdef INLINE_NATIVE
/**************************************************************************
 * Checkcast: $1--class_h $2--obj $3--offset
 **************************************************************************/
char* checkcast_native_code[] = {
    "Cmp.L  t1 $2 0",
    "Beq.L b3",
    "Mov.L  t2 $2",
    "Mov.A  t3 [t2]",
    "Add.A  t4 t3 $3",
    "Mov.A  t5 [t4]",
    "Cmp.A  t6 t5 $1",
    "Bne.A  normal",
    ".b3 Mov.L #1 $2",
    "Jp.I   end",
    ".normal Checkcast.L    #1 $1 $2",
    //".normal Mov.I #1 10",
    "\0"
} ;

char** Type_Inst::native_code[Type_Inst::n_type] = {
    checkcast_native_code,  //checkcast
    NULL                    //instanceof
};

bool Type_Inst::is_native_inline()
{
    bool do_fast_checkcast = false ;

    if(kind==cast){
        //class handle
        Operand *cl = src(0);
        assert(cl->is_single_def_temp_reg() || cl->kind == Operand::Immediate);
        unsigned c;
        if (cl->is_single_def_temp_reg())
            c = ((Imm_Operand*)((Temp_Reg*)cl)->inst()->src(0))->imm();
        else
            c = ((Imm_Operand*)cl)->imm();


        //offset
	    int depth = class_get_depth((Class_Handle)c);
	    offset = vtable_get_super_array_offset() + (depth - 1) * sizeof(void *);
	    do_fast_checkcast = ((!(class_get_flags((Class_Handle)c) & ACC_INTERFACE)) &&
		    class_get_name((Class_Handle)c)[0] != '[' && 
		    (depth < MAX_FAST_INSTOF_DEPTH));

    }

    return do_fast_checkcast;
}

Operand** Type_Inst::get_native_args(Expressions& exprs, int& len)
{
    Mem_Manager& mem = exprs.mem;
	len = n_srcs + 1;

    Operand** argarray= (Operand**)mem.alloc(len * sizeof(*argarray));

	//The old arguments 
    int i = 0 ;
	for(i = 0 ; i<n_srcs ; i++)
		argarray[i] = _srcs[i] ;

    //gen a txx = offset, then we'll use the operand of offset in the future.
    assert(offset>=0) ;
    Inst* inst_head = prev() ;
    Inst* offset_inst = exprs.lookup_imm(offset,JIT_TYPE_INT,inst_head) ;
    assert(offset_inst->dst()) ;
    argarray[n_srcs] = offset_inst->dst() ;

	return argarray ;
}

/*********************************************************************************************
 * Define Inlined Native Code for Inst
 * $1 x.lo
 * $2 x.hi
 * $3 y.lo
 * $4 y.hi
 *********************************************************************************************/
//argument: $1--x.lo $2--x.hi $3--magic $4--x $5--y
char* div_native_code[] = { 
    "Mov.I  t1 $2",
    "Cmp.I   t2 t1 0",
    "Bne.I  fast_64",
    "Mov.I  t3 $3",
    "Mov.I  t4 [t3+28]",
    "Cmp.I   t5 t4 0",
    "Beq.I  fast_64_2",
    //////////////////////Fast unsigned 32/32
    "Mov.I  t6 $1",
    "Smul.J t7 t6 t4",
    "Sub.I  t8 t6 t7.Hi",
    "Mov.I  t9 [t3+12]",
    "Shr.I  t88 t8 t9",
    "Add.I  t117 t7.Hi t88",
    "Mov.I  t10 [t3+32]",
    "Shr.I  t77 t117 t10",
    "Mov.J  #1.Hi 0",
    "Mov.J  #1.Lo t77",
    "Jp.I   end",
    ".fast_64   Mov.I	t3 $3",
    ".fast_64_2 Mov.I t12 [t3+40]",
    "Cmp.I   t13 t1 0",
    "Ble.I   fast_64_64",
    "Cmp.I   t14 t1 t12",
    "Bge.I   fast_64_64",
    ////////////////////Fast unsigned 64/32
    "Mov.I   t15 [t3+48]",
    "Mov.I   t6 $1",
    "Mov.I   t16 32",
    "Sub.I   t17 t16 t15",
    "Cmp.I   t50 t16 t15",
    "Beq.I   l_is_32",
    "Mov.J   t18 #1",
    "Mov.I   t18.Lo $1",
    "Mov.I   t18.Hi $2",
    
    //"Shl.J   t19 t18 t17",
    "Sub.I   t51 32 t17",
    "Shl.I   t52 t18.Hi t17",
    "Shr.I   t53 t18.Lo t51",
    "Or.I    t54 t52 t53",
    "Shl.I   t55 t18.Lo t17",
    "Mov.J   t19 t18",
    "Mov.I   t19.Lo t55",
    "Mov.I   t19.Hi t54",

    //
    ".l_is_32    Mov.I	t20 31",
    "Mov.I   t21 t19.Hi",
    "Sar.I   t22 t21 t20",
    "Mov.I   t23 [t3+56]",
    "And.I   t24 t23 t22",
    "Add.I   t25 t24 t55",
    "Cmp.I   t26 t19.Hi t22",
    "Beq.I   mul_zero",
    "Sub.I   t27 t19.Hi t22",
    "Mov.I   t28 t27",
    "Mov.I   t29 [t3+52]",
    "Smul.J  t30 t27 t29",
    "Jp.I    next1",
    ".mul_zero   Mov.I  t30.Lo 0",
    "Mov.I t30.Hi 0",
    ".next1  Add.I	t32 t30.Lo t25",
    "Adc.I  t100 t30.Hi 0",
    "Xor.I   t101 t32 t32",    //::
    "Add.I   t102 t100 t101",  //::
    "Add.I   t33 t19.Hi t102",
    "Mov.I   t34 t33",
    "Mov.I   t35 t34",
    //"Not.I   t36 t35",
    "Xor.I   t36 t35 4294967295",
    "Mov.I   t37 [t3+40]",
    "Smul.J  t38 t36 t37",
    "Sub.I   t39 t1 t37",
    "Add.I   t103 t38.Lo t6",
    "Adc.I   t104 t38.Hi t39",
    "Sub.I   t105 t103 t103",     //::
    "Add.I   t106 t104 t105", //::
    "Mov.I   t42 t34",
    //"Not.I   t43 t42",
    "Xor.I   t43 t42 4294967295",
    "Mov.I   t44 t106",
    "Sub.I   t45 t44 t43",
    "Mov.I   #1.Lo t45",
    "Mov.I   #1.Hi 0",
    "Jp.I    end",
    //////////////////////Fast signed 64/64
    ".fast_64_64   Div.J #1 $4 $5",
	"\0"
} ;

char** Div_Inst::native_code[Div_Inst::n_div] = {
	div_native_code ,	//div
	NULL,				//fdiv
	NULL,				//rem
	NULL				//frem
} ;

char* mul_native_code[] = {
"		Smul.J	#1		$1		$3		", //32 * 32, lo * lo, 
"		Cmp.I   t11     $2		0		",
"		Bne.I	Next					",
"		Cmp.I	t12		$4		0		",
"		Bne.I   Next					",
"		Jp.I	end						",
".Next	Smul.J	t13		$1		$4		", //lo * hi
"		Add.I   #1.Hi	#1.Hi   t13.Lo	",
"		Smul.J	t14		$2		$3		", //hi * lo
"		Add.I	#1.Hi	#1.Hi	t14.Lo	",
"\0"
};

char* mul_const_native_code[] = {
"		Mov.J	t1		$1				",
"		Mov.I	t1.Hi	0				",
"		Smul.J	#1		t1.Lo	$3		",
"		Cmp.I	t11		t1.Hi	0		",
"		Bne.I	Next					",
"		Jp.I	end						",
".Next	Mov.I	t6      t1.Hi			",
"		Mul.I	t7		t6		$3		",	//hi * lo, keep low 32 bits
"		Add.I	t8		#1.Hi	t7		",
"		Mov.I   #1.Hi	t8				",
"\0"
};


char** Mul_Inst::native_code[Mul_Inst::n_mul] = {
	mul_native_code ,		//mul
	NULL,					//fmul
	NULL,					//smul
	mul_const_native_code	//mul constant
} ;

char* shl_native_code[] = {
"			And.I	t33		$3		63  ",
"			Cmp.I	t11		t33		1   ",
"			Bge.I	Next				",
"			Mov.J	#1		$1			",
"			Jp.I    end					",
".Next		Cmp.I	t11		t33		64	",
"			Bge.I	Next64				",
"			Cmp.I	t11		t33		32	",
"			Bge.I	Next32				",
//"			Mov.I   t1      $1		",
"			Shl.I	t3		$2		t33	",
"			Sub.I   t4		32		t33	",
"			Shr.I   t5		$1		t4	",
"			Or.I	t3		t3		t5	",
"			Shl.I	t6		$1		t33	",
"			Mov.I   #1.Hi	t3			",
"			Mov.I	#1.Lo	t6			",
"			Jp.I	end					",
".Next32	Mov.I	#1.Lo	0			",	// cl >= 32 and cl < 64
"			And.I	t10		t33		31	",
"			Shl.I	#1.Hi	$1		t10	",
"			Jp.I	end					",
".Next64	Mov.J	#1.Hi	0			",
"\0"
};

char** Bitwise_Inst::native_code[Bitwise_Inst::n_bitwise] = {
	NULL,
	NULL,
	NULL,
	shl_native_code,
	NULL,
	NULL
};

bool get_long_constant_value(Operand* opnd, unsigned& higher, unsigned& lower)
{
	bool ret = false ;

	if (opnd->kind == Operand::Immediate) {
		lower = ((Imm_Operand*)opnd)->imm();
		assert(opnd->hi_opnd()) ;
		higher = ((Imm_Operand*)opnd->hi_opnd())->imm()	;
		ret = true ;
	} else if (opnd->is_single_def_temp_reg()) {
		Inst *asgn = ((Temp_Reg*)opnd)->inst();
		while ( asgn->is_reg_assignment())
			asgn = ((Temp_Reg*)asgn->src(0))->inst();
		if (asgn->is_imm_assignment()){ 
			lower = ((Imm_Operand*)asgn->src(0))->imm();
			higher = ((Imm_Operand*)asgn->src(0)->hi_opnd())->imm();
			ret = true ;
		}else if(asgn->is_const_assignment()){
			lower = ((Const_Operand*)asgn->src(0))->lo()->imm();
			higher = ((Const_Operand*)asgn->src(0))->hi()->imm();
			ret = true ;
		}
	}

	return ret ;
}

bool Div_Inst::is_native_inline()
{
	return false ; //TURN-OFF

	// should be ldiv
	if( kind != div || type() != JIT_TYPE_LONG)
		return false;

	// divisor should be a 32-bit constant
	uint64  divisor = __UINT64_C(0xffffffffffffffff) ;
	unsigned* lower = (unsigned*)&divisor ;
	unsigned* higher = &lower[1]	;
	bool r_is_constant = get_long_constant_value(_srcs[1],*higher, *lower) ;

	// dividend should not be a constants
	uint64  dividend = __UINT64_C(0xffffffffffffffff) ;
	unsigned* l_lower = (unsigned*)&dividend ;
	unsigned* l_higher = &l_lower[1]	;
	bool l_is_constant = get_long_constant_value(_srcs[0],*l_higher, *l_lower) ;

	if(divisor!=0 && divisor!=__UINT64_C(0xffffffffffffffff) && r_is_constant && !l_is_constant)
		return true ;
	else 
		return false ;
}

//Magic Number should be generated ,here.
//Its address should be counted as argument $3
Operand** Div_Inst::get_native_args(Expressions& exprs, int& len)
{
    Mem_Manager& mem = exprs.mem;
	len = 3 + n_srcs;

    Operand** argarray= (Operand**)mem.alloc(len * sizeof(*argarray)); // expand to 3 for a long division
	//dividend is expanded to 2 INT
	argarray[0] = _srcs[0];
	argarray[1] = _srcs[0]->hi_opnd();

	//divisor is changed to the address of magic numbers
	uint64  divisor = __UINT64_C(0xffffffffffffffff) ;
	unsigned* lower = (unsigned*)&divisor ;
	unsigned* higher = &lower[1]	;
	bool r_is_constant = get_long_constant_value(_srcs[1],*higher, *lower) ;
	assert(r_is_constant) ;

	MAGIC* m = (MAGIC*)gc_malloc_fixed_code_for_class_loading(sizeof(MAGIC)) ;
	m->divisor = divisor ;
	m->d_32 = 0 ; //means we have not magic number of fast 64/32
	magic(divisor,*m) ;
	if((uint64)divisor < __UINT64_C(0x100000000)){ // Calulate 32-bit magic number
		magic_32u((unsigned)divisor,*m) ;
		magic_rem((unsigned)divisor,*m) ;
	}
	Inst* imm = exprs.lookup_imm((unsigned)m,JIT_TYPE_INT,this) ;
	argarray[2] = imm->dst() ;
	assert(argarray[2]) ;

	//The old arguments are appened after the new 3
	int i = 0 ;
	for(i = 0 ; i<n_srcs ; i++)
		argarray[3+i] = _srcs[i] ;

	return argarray ;
}

bool Mul_Inst::is_native_inline()
{
	// lmul
	if( kind != mul || type() != JIT_TYPE_LONG)
		return false;

	// should by a 32-bit constant
	uint64  multiplicator = __UINT64_C(0xffffffffffffffff) ;
	unsigned* lower = (unsigned*)&multiplicator ;
	unsigned* higher = &lower[1]	;
	bool r_is_constant = get_long_constant_value(_srcs[1],*higher, *lower) ;

	// should not be a constants
	uint64  multiplicand = __UINT64_C(0xffffffffffffffff) ;
	unsigned* l_lower = (unsigned*)&multiplicand ;
	unsigned* l_higher = &l_lower[1]	;
	bool l_is_constant = get_long_constant_value(_srcs[0],*l_higher, *l_lower) ;

	is_mul_const = false;

	if(r_is_constant && !l_is_constant && *higher==0) //32-bit
	{
		is_mul_const = true;
		return true;
	}
	else 
		is_mul_const = false;

//	return true;
	return false;
}

Operand** Mul_Inst::get_native_args(Expressions& exprs, int& len)
{
    Mem_Manager& mem = exprs.mem;
	len = 4 + n_srcs;

    Operand** argarray= (Operand**)mem.alloc(len * sizeof(*argarray));

	int i;
	for(i = 0 ; i<n_srcs ; i++){
		if(_srcs[i]->type == JIT_TYPE_LONG){
			argarray[2*i] = _srcs[i];
			argarray[2*i+1] = _srcs[i]->hi_opnd();
		}
	}
	
	//The old arguments are appened after the new 4
	for(i = 0 ; i<n_srcs ; i++)
		argarray[4+i] = _srcs[i] ;

	return argarray ;
}

bool Bitwise_Inst::is_native_inline()
{
	// should be lshl
	if ( kind != shl || type() != JIT_TYPE_LONG)
		return false;
/*
	// should by a 32-bit constant
	uint64  multiplicator = __UINT64_C(0xffffffffffffffff) ;
	unsigned* lower = (unsigned*)&multiplicator ;
	unsigned* higher = &lower[1]	;
	bool r_is_constant = get_long_constant_value(_srcs[1],*higher, *lower) ;
*/
	// should not be a constants
	uint64  l_opnd = __UINT64_C(0xffffffffffffffff) ;
	unsigned* l_lower = (unsigned*)&l_opnd ;
	unsigned* l_higher = &l_lower[1]	;
	bool l_is_constant = get_long_constant_value(_srcs[0],*l_higher, *l_lower) ;
	
	if( !l_is_constant){
		return true;
	}
	else 
		return false;
}

Operand** Bitwise_Inst::get_native_args(Expressions& exprs, int& len)
{
    Mem_Manager& mem = exprs.mem;
	len = 3;

    Operand** argarray= (Operand**)mem.alloc(len * sizeof(*argarray));

	for(int i = 0 ; i<n_srcs ; i++){
		if(_srcs[i]->type == JIT_TYPE_LONG){
			argarray[2*i] = _srcs[i];
			argarray[2*i+1] = _srcs[i]->hi_opnd();
			assert(2*i+1 < 3);
		}
		else if (_srcs[i]->type == JIT_TYPE_INT){
			argarray[2*i] = _srcs[i];
		}
	}

	return argarray ;
}

/*Inst** Mul_Inst::gen_native_args(Inst* head, Expressions& exprs, int& len)
{
    Mem_Manager& mem = exprs.mem;

    Inst** argarray= (Inst**)mem.alloc(2*n_srcs * sizeof(*argarray));

	for(int i = 0 ; i<n_srcs ; i++){
		if(_srcs[i]->type == JIT_TYPE_LONG){
			Temp_Reg* dst_lo = exprs.create_new_temp_reg(JIT_TYPE_INT) ;
		    Operand_Exp* exp_dst_lo = exprs.lookup_temp_reg_exp(dst_lo) ;
			Exp *asgn_lo = exprs.lookup_inst_exp(Exp::Assign, exp_dst_lo, exp, JIT_TYPE_INT) ;
			argarray[2*i] = new (mem) Assign_Inst(dst_lo,_srcs[i],asgn_lo,head);

			Temp_Reg* dst_hi = exprs.create_new_temp_reg(JIT_TYPE_INT) ;
		    Operand_Exp* exp_dst_hi = exprs.lookup_temp_reg_exp(dst_hi) ;
			Exp *asgn_hi = exprs.lookup_inst_exp(Exp::Assign, exp_dst_hi, exp, JIT_TYPE_INT) ;
			argarray[2*i+1] = new (mem) Assign_Inst(dst_hi,_srcs[i]->hi_opnd(),asgn_hi,head);
		}
	}

	len = 2*n_srcs ;//maybe not
	return argarray ;
}*/
//#endif

int Add_Inst::esp_effect()
{
    if (_dst->is_physical_reg() && _dst->bv_position() == esp_reg)
    {
        assert(n_srcs == 2);
        assert(_srcs[1]->kind == Operand::Immediate);
        return (int) ((Imm_Operand *)_srcs[1])->imm();
    }
    return 0;
}

int Sub_Inst::esp_effect()
{
    if (_dst->is_physical_reg() && _dst->bv_position() == esp_reg)
    {
        assert(n_srcs == 2);
        assert(_srcs[1]->kind == Operand::Immediate);
        return -(int) ((Imm_Operand *)_srcs[1])->imm();
    }
    return 0;
}

int Call_Inst::esp_effect()
{
    if (kind == multinew_call)
        return 0;
#ifdef JIT_SAPPHIRE
	if (kind == writebarrier_call)
		return 0;
	if (kind == readbarrier_call)//::
		return 0;
#endif
#ifdef STAT_INDIRECT_CALL
	if (kind == stat_indirect_call)
		return 0 ;
#endif
    return 1;
}

Operand *Call_Inst::get_arg_opnd(unsigned i) {
    assert(i < _n_args && _args[i] != NULL);
    if (_args[i]->dst() != NULL)
        return _args[i]->dst();
    else {
        assert(_args[i]->is_push());
        return _args[i]->src(0);
    }
}

void Array_Operand::fold_operand(Folding& fold) {
    fold.replace_reg_opnd(_base);
    fold.replace_reg_opnd(_index);
    // Try to fold an immediate array index directly into the computation.
    // It will be the job of the spill code to ignore it and/or replace it
    // with a field operand.
    if (!_index->is_single_def_temp_reg()) return;
    Temp_Reg *r = (Temp_Reg*)_index;
    Inst *def = r->inst();
    if (def->is_imm_assignment()) {
        Operand *new_src = fold.lookup_replace_opnd(r);
        if (new_src != NULL) {
            assert(new_src->kind == Operand::Immediate);
            _index = (Reg_Operand*)new_src;
            if(!r->global_reg_alloc_cand())
                def->mark_dead();
        }
    } else
        def->unmark_dead();
    if (type == JIT_TYPE_DOUBLE && !is_hi() != 0)
        hi_opnd()->fold_operand(fold);
}

bool Add_Inst::can_use_lea()
{
    if (_dst->type == JIT_TYPE_LONG)
        return false;
    if (kind != add)
        return false;
    if (_dst->assigned_preg() == n_reg)
        return false;
    if ((_srcs[0]->kind == Operand::Immediate || _srcs[0]->assigned_preg() != n_reg) &&
        (_srcs[1]->kind == Operand::Immediate || _srcs[1]->assigned_preg() != n_reg))
        return true;
    return false;
}

bool Sub_Inst::can_use_lea()
{
    if (_dst->type == JIT_TYPE_LONG)
        return false;
    if (kind != sub)
        return false;
    if (_dst->assigned_preg() == n_reg)
        return false;
    if (_srcs[0]->assigned_preg() != n_reg && _srcs[1]->kind == Operand::Immediate)
        return true;
    return false;
}

bool Add_Inst::affects_flags()
{
    if (kind == fadd)
        return false;
    if (!can_use_lea())
        return true;
    if (_must_use_lea)
        return false;
#if 0
    if (_dst->bv_position() == _srcs[0]->bv_position())
        return true;  // emit() won't use lea in this case.
#endif // 0
    return false;
}

bool Sub_Inst::affects_flags()
{
    if (kind == fsub)
        return false;
    if (!can_use_lea())
        return true;
    if (_must_use_lea)
        return false;
#if 0
    if (_dst->bv_position() == _srcs[0]->bv_position())
        return true;  // emit() won't use lea in this case.
#endif // 0
    return false;
}

bool Mul_Inst::affects_flags()
{
    if (kind == fmul)
        return false;
    return true;
}

bool Div_Inst::affects_flags()
{
    if (kind == fdiv || kind == frem)
        return false;
    return true;
}

bool Neg_Inst::affects_flags()
{
    if (kind == fneg)
        return false;
    return true;
}

Inst *Inst::simplify(Mem_Manager &mem, Expressions &exprs)
{
    Operand *combine_opnd;
    if (!is_copy_prop_assignment(exprs, _srcs[0], _srcs[1], _srcs[2], combine_opnd))
        return this;
    Inst *result = new(mem) Assign_Inst(_dst, combine_opnd, exp, this);
#ifdef PRINTABLE_O3
    result->bc_index = bc_index;
#endif // PRINTABLE_O3
    unlink();
    return result;
}

bool Add_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                       Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    if (src1->kind != Operand::Immediate)
        return false;
    if (kind != add)
        return false;
    if (type() != JIT_TYPE_INT)
        return false;
    unsigned left =  ((Imm_Operand *)src0)->imm();
    unsigned right = ((Imm_Operand *)src1)->imm();
    unsigned combine = left + right;
    new_rhs = exprs.lookup_imm_exp(combine, JIT_TYPE_INT)->opnd;
    return true;
}

bool Sub_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                       Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    if (src1->kind != Operand::Immediate)
        return false;
    if (kind != sub)
        return false;
    if (type() != JIT_TYPE_INT)
        return false;
    unsigned left =  ((Imm_Operand *)src0)->imm();
    unsigned right = ((Imm_Operand *)src1)->imm();
    unsigned combine = left - right;
    new_rhs = exprs.lookup_imm_exp(combine, JIT_TYPE_INT)->opnd;
    return true;
}

bool Mul_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                       Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    if (src1->kind != Operand::Immediate)
        return false;
    if (kind != mul)
        return false;
    if (type() != JIT_TYPE_INT)
        return false;
    unsigned left =  ((Imm_Operand *)src0)->imm();
    unsigned right = ((Imm_Operand *)src1)->imm();
    unsigned combine = left * right;
    new_rhs = exprs.lookup_imm_exp(combine, JIT_TYPE_INT)->opnd;
    return true;
}

bool Neg_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                       Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    if (kind != neg)
        return false;
    if (type() != JIT_TYPE_INT)
        return false;
    int left = ((Imm_Operand *)src0)->imm();
    int combine = -left;
    new_rhs = exprs.lookup_imm_exp(combine, JIT_TYPE_INT)->opnd;
    return true;
}

bool Bitwise_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                           Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    if (src1->kind != Operand::Immediate)
        return false;
    if (type() != JIT_TYPE_INT)
        return false;
    unsigned left =  ((Imm_Operand *)src0)->imm();
    unsigned right = ((Imm_Operand *)src1)->imm();
    unsigned combine;
    switch (kind)
    {
    case k_and:
        combine = left & right;
        break;
    case k_or:
        combine = left | right;
        break;
    case k_xor:
        combine = left ^ right;
        break;
    case shl:
        combine = left << right;
        break;
    case shr:
        combine = left >> right;
        break;
    case sar:
        combine = ((int)left) >> right;
        break;
    default:
        assert(0);
        return false;
        break;
    }
    new_rhs = exprs.lookup_imm_exp(combine, JIT_TYPE_INT)->opnd;
    return true;
}

bool Convt_Inst::is_copy_prop_assignment(Expressions &exprs, Operand *src0, Operand *src1,
                                         Operand *src2, Operand *&new_rhs)
{
    if (src0->kind != Operand::Immediate)
        return false;
    unsigned val = ((Imm_Operand *)src0)->imm();
    unsigned result;
    switch (type())
    {
    case JIT_TYPE_BYTE:
        result = (char) val;
        break;
    case JIT_TYPE_CHAR:
        result = (unsigned short) val;
        break;
    case JIT_TYPE_SHORT:
        result = (short) val;
        break;
    default:
        return false;
        break;
    }
    new_rhs = exprs.lookup_imm_exp(result, JIT_TYPE_INT)->opnd;
    return true;
}

Inst *Compare_Inst::simplify(Mem_Manager &mem, Expressions &exprs)
{
    assert(_srcs[0]->kind == Operand::Immediate);
    assert(n_srcs < 2 || _srcs[1]->kind == Operand::Immediate);
    if (_srcs[0]->type != JIT_TYPE_INT)
        return this;
    if (kind != cmp && kind != test)
        return this;
    int left =  ((Imm_Operand *)_srcs[0])->imm();
    int right = (n_srcs < 2 ? 0 : ((Imm_Operand *)_srcs[1])->imm());
    assert(_dst->is_status_flags());
    Status_Flags *dest = (Status_Flags *) _dst;
    if (left < right)
        dest->set_lt();
    else if (left == right)
        dest->set_eq();
    else if (left > right)
        dest->set_gt();
    return this;
}

