Browse Source

Add support for more opcodes on x86-64.

cache-refactoring
Klaus Treichel 17 years ago
parent
commit
b9365a9ab8
  1. 14
      ChangeLog
  2. 1009
      jit/jit-gen-x86-64.h
  3. 516
      jit/jit-rules-x86-64.c
  4. 548
      jit/jit-rules-x86-64.ins

14
ChangeLog

@ -10,6 +10,20 @@
* include/jit/jit-walk.h: use _JIT_ARCH_GET_RETURN_ADDRESS and
_JIT_ARCH_GET_CURRENT_RETURN if available.
* jit/jit-gen-x86-64.h: Add additional macros for saving and
restoring the fpu controlword and the mxcsr register. Add
additional SSE conversion macros. Add SSE compare macros.
Add macros for the SSE bit operations on packed values.
Add macros for SSE sqrt and rounding. Add macros for fpu rounding.
* jit/jit-rules-x86-64.c: Add the dreg register class and functions
to handle rounding and SSE bit opcodes on packed values.
* jit/jit-rules-x86-64.ins: Add INT_TO_NFLOAT, LONG_TO_NFLOAT,
FLOAT32_TO_NFLOAT, FLOAT64_TO_NFLOAT.
Rewrite NFLOAT_TO_INT and NFLOAT_TO_LONG to use the new functions
in jit-rules-x86-64.c. Add handling of ABS, NEG and float compares.
2008-03-31 Klaus Treichel <ktreichel@web.de>
* jit/jit-rules-x86.ins: Fix the sign opcode for integers and the

1009
jit/jit-gen-x86-64.h

File diff suppressed because it is too large

516
jit/jit-rules-x86-64.c

@ -103,6 +103,22 @@
*/
#define HAVE_RED_ZONE 1
/*
* Some declarations that should be replaced by querying the cpuinfo
* if generating code for the current cpu.
*/
/*
#define HAVE_X86_SSE_4_1 0
#define HAVE_X86_SSE_4 0
#define HAVE_X86_SSE_3 0
#define HAVE_X86_FISTTP 0
*/
#define TODO() \
do { \
fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
} while(0)
/*
* Setup or teardown the x86 code output process.
*/
@ -165,6 +181,9 @@ static int _jit_sse_return_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1};
static _jit_regclass_t *x86_64_reg; /* X86_64 general purpose registers */
static _jit_regclass_t *x86_64_creg; /* X86_64 call clobbered general */
/* purpose registers */
static _jit_regclass_t *x86_64_dreg; /* general purpose registers that */
/* can be used as divisor */
/* (all but %rax and %rdx) */
static _jit_regclass_t *x86_64_rreg; /* general purpose registers not used*/
/* for returning values */
static _jit_regclass_t *x86_64_sreg; /* general purpose registers that can*/
@ -196,6 +215,16 @@ _jit_init_backend(void)
X86_64_REG_R9, X86_64_REG_R10,
X86_64_REG_R11);
/* r egister class for divisors */
x86_64_dreg = _jit_regclass_create(
"dreg", JIT_REG_WORD | JIT_REG_LONG, 12,
X86_64_REG_RCX, X86_64_REG_RBX,
X86_64_REG_RSI, X86_64_REG_RDI,
X86_64_REG_R8, X86_64_REG_R9,
X86_64_REG_R10, X86_64_REG_R11,
X86_64_REG_R12, X86_64_REG_R13,
X86_64_REG_R14, X86_64_REG_R15);
/* register class with all registers not used for returning values */
x86_64_rreg = _jit_regclass_create(
"rreg", JIT_REG_WORD | JIT_REG_LONG, 12,
@ -340,6 +369,452 @@ _jit_xmm1_reg_imm_size_float64(jit_gencode_t gen, unsigned char **inst_ptr,
return 1;
}
/*
* Do a logical xmm operation with packed float32 values
*/
static int
_jit_plops_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
X86_64_XMM_PLOP opc, int reg, void *packed_value)
{
void *ptr;
jit_nint offset;
unsigned char *inst;
inst = *inst_ptr;
ptr = _jit_cache_alloc(&(gen->posn), 16);
if(!ptr)
{
return 0;
}
jit_memcpy(ptr, packed_value, 16);
/* calculate the offset for membase addressing */
offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 8 : 7));
if((offset >= jit_min_int) && (offset <= jit_max_int))
{
/* We can use RIP relative addressing here */
x86_64_plops_reg_membase(inst, opc, reg, X86_64_RIP, offset);
*inst_ptr = inst;
return 1;
}
/* Check if mem addressing can be used */
if(((jit_nint)ptr >= jit_min_int) &&
((jit_nint)ptr <= jit_max_int))
{
/* We can use absolute addressing */
x86_64_plops_reg_mem(inst, opc, reg, (jit_nint)ptr);
*inst_ptr = inst;
return 1;
}
/* We have to use an extra general register */
TODO();
return 0;
}
/*
* Do a logical xmm operation with packed float64 values
*/
static int
_jit_plopd_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
X86_64_XMM_PLOP opc, int reg, void *packed_value)
{
void *ptr;
jit_nint offset;
unsigned char *inst;
inst = *inst_ptr;
ptr = _jit_cache_alloc(&(gen->posn), 16);
if(!ptr)
{
return 0;
}
jit_memcpy(ptr, packed_value, 16);
/* calculate the offset for membase addressing */
offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8));
if((offset >= jit_min_int) && (offset <= jit_max_int))
{
/* We can use RIP relative addressing here */
x86_64_plopd_reg_membase(inst, opc, reg, X86_64_RIP, offset);
*inst_ptr = inst;
return 1;
}
/* Check if mem addressing can be used */
if(((jit_nint)ptr >= jit_min_int) &&
((jit_nint)ptr <= jit_max_int))
{
/* We can use absolute addressing */
x86_64_plopd_reg_mem(inst, opc, reg, (jit_nint)ptr);
*inst_ptr = inst;
return 1;
}
/* We have to use an extra general register */
TODO();
return 0;
}
/*
* Helpers for saving and setting roundmode in the fpu control word
* and restoring it afterwards.
* The rounding mode bits are bit 10 and 11 in the fpu control word.
* sp_offset is the start offset of a temporary eight byte block.
*/
static unsigned char *
_x86_64_set_fpu_roundmode(unsigned char *inst, int scratch_reg,
int sp_offset, X86_64_ROUNDMODE mode)
{
int fpcw_save_offset = sp_offset + 4;
int fpcw_new_offset = sp_offset;
int round_mode = ((int)mode) << 10;
int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 10);
/* store FPU control word */
x86_64_fnstcw_membase(inst, X86_64_RSP, fpcw_save_offset);
/* load the value into the scratch register */
x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, fpcw_save_offset, 2);
/* Set the rounding mode */
if(mode != X86_ROUND_ZERO)
{
/* Not all bits are set in the mask so we have to clear it first */
x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 2);
}
x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 2);
/* Store the new round mode */
x86_64_mov_membase_reg_size(inst, X86_64_RSP, fpcw_new_offset, scratch_reg, 2);
/* Now load the new control word */
x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_new_offset);
return inst;
}
static unsigned char *
_x86_64_restore_fpcw(unsigned char *inst, int sp_offset)
{
int fpcw_save_offset = sp_offset + 4;
/* Now load the saved control word */
x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_save_offset);
return inst;
}
/*
* Helpers for saving and setting roundmode in the mxcsr register and
* restoring it afterwards.
* The rounding mode bits are bit 13 and 14 in the mxcsr register.
* sp_offset is the start offset of a temporary eight byte block.
*/
static unsigned char *
_x86_64_set_xmm_roundmode(unsigned char *inst, int scratch_reg,
int sp_offset, X86_64_ROUNDMODE mode)
{
int mxcsr_save_offset = sp_offset + 4;
int mxcsr_new_offset = sp_offset;
int round_mode = ((int)mode) << 13;
int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 13);
/* save the mxcsr register */
x86_64_stmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
/* Load the contents of the mxcsr register into the scratch register */
x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, mxcsr_save_offset, 4);
/* Set the rounding mode */
if(mode != X86_ROUND_ZERO)
{
/* Not all bits are set in the mask so we have to clear it first */
x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 4);
}
x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 4);
/* Store the new round mode */
x86_64_mov_membase_reg_size(inst, X86_64_RSP, mxcsr_new_offset, scratch_reg, 4);
/* and load it to the mxcsr register */
x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_new_offset);
return inst;
}
static unsigned char *
_x86_64_restore_mxcsr(unsigned char *inst, int sp_offset)
{
int mxcsr_save_offset = sp_offset + 4;
/* restore the mxcsr register */
x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
return inst;
}
/*
* perform rounding of scalar single precision values.
* We have to use the fpu where see4.1 is not supported.
*/
static unsigned char *
x86_64_rounds_reg_reg(unsigned char *inst, int dreg, int sreg,
int scratch_reg, X86_64_ROUNDMODE mode)
{
#ifdef HAVE_RED_ZONE
#ifdef HAVE_X86_SSE_4_1
x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
#else
/* Copy the xmm register to the stack */
x86_64_movss_membase_reg(inst, X86_64_RSP, -16, sreg);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RSP, -16, 4);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
/* and move st(0) to the destination register */
x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
#endif
#else
#ifdef HAVE_X86_SSE_4_1
x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
#else
/* allocate space on the stack for two ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
/* Copy the xmm register to the stack */
x86_64_movss_regp_reg(inst, X86_64_RSP, sreg);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
/* Load the value to the fpu */
x86_64_fld_regp_size(inst, X86_64_RSP, 4);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 8);
/* and move st(0) to the destination register */
x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
#endif
#endif
return inst;
}
static unsigned char *
x86_64_rounds_reg_membase(unsigned char *inst, int dreg, int offset,
int scratch_reg, X86_64_ROUNDMODE mode)
{
#ifdef HAVE_RED_ZONE
#ifdef HAVE_X86_SSE_4_1
x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
#else
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
/* and move st(0) to the destination register */
x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
#endif
#else
#ifdef HAVE_X86_SSE_4_1
x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
#else
/* allocate space on the stack for two ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 8);
/* and move st(0) to the destination register */
x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
#endif
#endif
return inst;
}
/*
* perform rounding of scalar double precision values.
* We have to use the fpu where see4.1 is not supported.
*/
static unsigned char *
x86_64_roundd_reg_reg(unsigned char *inst, int dreg, int sreg,
int scratch_reg, X86_64_ROUNDMODE mode)
{
#ifdef HAVE_RED_ZONE
#ifdef HAVE_X86_SSE_4_1
x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
#else
/* Copy the xmm register to the stack */
x86_64_movsd_membase_reg(inst, X86_64_RSP, -16, sreg);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RSP, -16, 8);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
/* and move st(0) to the destination register */
x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
#endif
#else
#ifdef HAVE_X86_SSE_4_1
x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
#else
/* allocate space on the stack for two ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
/* Copy the xmm register to the stack */
x86_64_movsd_regp_reg(inst, X86_64_RSP, sreg);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
/* Load the value to the fpu */
x86_64_fld_regp_size(inst, X86_64_RSP, 8);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 8);
/* and move st(0) to the destination register */
x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
#endif
#endif
return inst;
}
static unsigned char *
x86_64_roundd_reg_membase(unsigned char *inst, int dreg, int offset,
int scratch_reg, X86_64_ROUNDMODE mode)
{
#ifdef HAVE_RED_ZONE
#ifdef HAVE_X86_SSE_4_1
x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
#else
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
/* and move st(0) to the destination register */
x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
#endif
#else
#ifdef HAVE_X86_SSE_4_1
x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
#else
/* allocate space on the stack for two ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
/* Load the value to the fpu */
x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 8);
/* and move st(0) to the destination register */
x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
#endif
#endif
return inst;
}
/*
* Round the value in St(0) to integer according to the rounding
* mode specified.
*/
static unsigned char *
x86_64_roundnf(unsigned char *inst, int scratch_reg, X86_64_ROUNDMODE mode)
{
#ifdef HAVE_RED_ZONE
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
#else
/* allocate space on the stack for two ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 0, mode);
/* And round it to integer */
x86_64_frndint(inst);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 0);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#endif
return inst;
}
/*
* Round the value in the fpu register st(0) to integer and
* store the value in dreg. St(0) is popped from the fpu stack.
*/
static unsigned char *
x86_64_nfloat_to_int(unsigned char *inst, int dreg, int scratch_reg, int size)
{
#ifdef HAVE_RED_ZONE
#ifdef HAVE_X86_FISTTP
/* convert float to int */
x86_64_fisttp_membase_size(inst, X86_64_RSP, -8, 4);
/* move result to the destination */
x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -8, 4);
#else
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, X86_ROUND_ZERO);
/* And round the value in st(0) to integer and store it on the stack */
x86_64_fistp_membase_size(inst, X86_64_RSP, -16, size);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, -8);
/* and load the integer to the destination register */
x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -16, size);
#endif
#else
#ifdef HAVE_X86_FISTTP
/* allocate space on the stack for one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
/* convert float to int */
x86_64_fisttp_regp_size(inst, X86_64_RSP, 4);
/* move result to the destination */
x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, 4);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#else
/* allocate space on the stack for 2 ints and one long value */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
/* Set the fpu round mode */
inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, X86_ROUND_ZERO);
/* And round the value in st(0) to integer and store it on the stack */
x86_64_fistp_regp_size(inst, X86_64_RSP, size);
/* restore the fpu control word */
inst = _x86_64_restore_fpcw(inst, 8);
/* and load the integer to the destination register */
x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, size);
/* restore the stack pointer */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
#endif
#endif
return inst;
}
/*
* Call a function
*/
@ -1049,8 +1524,15 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
{
int xmm_reg = _jit_reg_info[reg].cpu_reg;
_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MOV,
xmm_reg, &float32_value);
if(float32_value == (jit_float32) 0.0)
{
x86_64_clear_xreg(inst, xmm_reg);
}
else
{
_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MOV,
xmm_reg, &float32_value);
}
}
else
{
@ -1069,7 +1551,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float32));
jit_memcpy(ptr, &float32_value, sizeof(float32_value));
offset = (jit_nint)ptr - ((jit_nint)inst + 7);
offset = (jit_nint)ptr - ((jit_nint)inst + 6);
if((offset >= jit_min_int) && (offset <= jit_max_int))
{
/* We can use RIP relative addressing here */
@ -1084,7 +1566,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
else
{
/* We have to use an extra general register */
/* TODO */
TODO();
}
}
}
@ -1111,8 +1593,15 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
{
int xmm_reg = _jit_reg_info[reg].cpu_reg;
_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MOV,
xmm_reg, &float64_value);
if(float64_value == (jit_float64) 0.0)
{
x86_64_clear_xreg(inst, xmm_reg);
}
else
{
_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MOV,
xmm_reg, &float64_value);
}
}
else
{
@ -1131,7 +1620,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float64));
jit_memcpy(ptr, &float64_value, sizeof(float64_value));
offset = (jit_nint)ptr - ((jit_nint)inst + 7);
offset = (jit_nint)ptr - ((jit_nint)inst + 6);
if((offset >= jit_min_int) && (offset <= jit_max_int))
{
/* We can use RIP relative addressing here */
@ -1146,7 +1635,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
else
{
/* We have to use an extra general register */
/* TODO */
TODO();
}
}
}
@ -1192,7 +1681,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
else
{
/* We have to use an extra general register */
/* TODO */
TODO();
}
}
else
@ -1212,7 +1701,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_nfloat));
jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value));
offset = (jit_nint)ptr - ((jit_nint)inst + 7);
offset = (jit_nint)ptr - ((jit_nint)inst + 6);
if((offset >= jit_min_int) && (offset <= jit_max_int))
{
/* We can use RIP relative addressing here */
@ -1241,7 +1730,7 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value
else
{
/* We have to use an extra general register */
/* TODO */
TODO();
}
}
}
@ -2315,11 +2804,6 @@ flush_return_struct(unsigned char *inst, jit_value_t value)
return inst;
}
#define TODO() \
do { \
fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
} while (0)
void
_jit_gen_insn(jit_gencode_t gen, jit_function_t func,
jit_block_t block, jit_insn_t insn)

548
jit/jit-rules-x86-64.ins

@ -22,6 +22,7 @@
%regclass reg x86_64_reg
%regclass creg x86_64_creg
%regclass dreg x86_64_dreg
%regclass rreg x86_64_rreg
%regclass sreg x86_64_sreg
%regclass freg x86_64_freg
@ -91,62 +92,106 @@ JIT_OP_EXPAND_UINT:
x86_64_mov_reg_reg_size(inst, $1, $2, 4);
}
JIT_OP_INT_TO_NFLOAT:
[=freg, local] -> {
x86_64_fild_membase_size(inst, X86_64_RBP, $2, 4);
}
[=freg, reg] -> {
#ifdef HAVE_RED_ZONE
x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 4);
x86_64_fild_membase_size(inst, X86_64_RSP, -8, 4);
#else
x86_64_push_reg_size(inst, $2, 8);
x86_64_fild_membase_size(inst, X86_64_RSP, 0, 4);
x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
#endif
}
JIT_OP_LONG_TO_NFLOAT:
[=freg, local] -> {
x86_64_fild_membase_size(inst, X86_64_RBP, $2, 8);
}
[=freg, reg] -> {
#ifdef HAVE_RED_ZONE
x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 8);
x86_64_fild_membase_size(inst, X86_64_RSP, -8, 8);
#else
x86_64_push_reg_size(inst, $2, 8);
x86_64_fild_membase_size(inst, X86_64_RSP, 0, 8);
x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
#endif
}
JIT_OP_NFLOAT_TO_INT: stack
[=reg, freg] -> {
/* allocate space on the stack for 2 shorts and 1 int */
[=reg, freg, scratch reg] -> {
inst = x86_64_nfloat_to_int(inst, $1, $3, 4);
}
JIT_OP_NFLOAT_TO_LONG: stack
[=reg, freg, scratch reg] -> {
inst = x86_64_nfloat_to_int(inst, $1, $3, 8);
}
JIT_OP_FLOAT32_TO_NFLOAT:
[=freg, local] -> {
x86_64_fld_membase_size(inst, X86_64_RBP, $2, 4);
}
[=freg, xreg] -> {
#ifdef HAVE_RED_ZONE
x86_64_movss_membase_reg(inst, X86_64_RSP, -8, $2);
x86_64_fld_membase_size(inst, X86_64_RSP, -8, 4);
#else
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
/* store FPU control word */
x86_64_fnstcw_membase(inst, X86_64_RSP, 0);
/* set "round toward zero" mode */
x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2);
x86_64_or_reg_imm_size(inst, $1, 0xc00, 2);
x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2);
x86_64_fldcw_membase(inst, X86_64_RSP, 2);
/* convert float to int */
x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 4);
/* restore FPU control word */
x86_64_fldcw_membase(inst, X86_64_RSP, 0);
/* move result to the destination */
x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 4);
/* restore the stack */
x86_64_movss_regp_reg(inst, X86_64_RSP, $2);
x86_64_fld_regp_size(inst, X86_64_RSP, 4);
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#endif
}
JIT_OP_NFLOAT_TO_LONG: stack
[=reg, freg] -> {
/* allocate space on the stack for 2 shorts and 1 long */
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 12, 8);
/* store FPU control word */
x86_64_fnstcw_membase(inst, X86_64_RSP, 0);
/* set "round toward zero" mode */
x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2);
x86_64_or_reg_imm_size(inst, $1, 0xc00, 2);
x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2);
x86_64_fldcw_membase(inst, X86_64_RSP, 2);
/* convert float to long */
x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 8);
/* restore FPU control word */
x86_64_fldcw_membase(inst, X86_64_RSP, 0);
/* move result to the destination */
x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 8);
/* restore the stack */
x86_64_add_reg_imm_size(inst, X86_64_RSP, 12, 8);
JIT_OP_FLOAT64_TO_NFLOAT:
[=freg, local] -> {
x86_64_fld_membase_size(inst, X86_64_RBP, $2, 8);
}
[=freg, xreg] -> {
#ifdef HAVE_RED_ZONE
x86_64_movsd_membase_reg(inst, X86_64_RSP, -8, $2);
x86_64_fld_membase_size(inst, X86_64_RSP, -8, 8);
#else
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
x86_64_movsd_regp_reg(inst, X86_64_RSP, $2);
x86_64_fld_regp_size(inst, X86_64_RSP, 8);
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#endif
}
JIT_OP_NFLOAT_TO_FLOAT32: stack
[=xreg, freg] -> {
#ifdef HAVE_RED_ZONE
/* Avoid modifying the stack pointer by simply using negative */
/* offsets here. */
x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 4);
x86_64_movss_reg_membase(inst, $1, X86_64_RSP, -8);
#else
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
x86_64_movss_reg_regp(inst, $1, X86_64_RSP);
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#endif
}
JIT_OP_NFLOAT_TO_FLOAT64: stack
[=xreg, freg] -> {
#ifdef HAVE_RED_ZONE
/* Avoid modifying the stack pointer by simply using negative */
/* offsets here. */
x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 8);
x86_64_movsd_reg_membase(inst, $1, X86_64_RSP, -8);
#else
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
x86_64_movsd_reg_regp(inst, $1, X86_64_RSP);
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
#endif
}
/*
@ -894,12 +939,12 @@ JIT_OP_IDIV: more_space
x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 4);
x86_64_sar_reg_imm_size(inst, $1, shift, 4);
}
[reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 4);
x86_64_cdq(inst);
x86_64_idiv_reg_size(inst, $3, 4);
}
[reg("rax"), reg, scratch reg("rdx")] -> {
[reg("rax"), dreg, scratch reg("rdx")] -> {
jit_int min_int = jit_min_int;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@ -937,12 +982,12 @@ JIT_OP_IDIV_UN: more_space
}
x86_64_shr_reg_imm_size(inst, $1, shift, 4);
}
[reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 4);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $3, 4);
}
[reg("rax"), reg, scratch reg("rdx")] -> {
[reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $2, $2, 4);
@ -974,12 +1019,12 @@ JIT_OP_IREM: more_space
x86_patch(patch, inst);
x86_64_clear_reg(inst, $1);
}
[=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 4);
x86_64_cdq(inst);
x86_64_idiv_reg_size(inst, $4, 4);
}
[=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
jit_int min_int = jit_min_int;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@ -1009,16 +1054,16 @@ JIT_OP_IREM_UN: more_space
[reg, imm, if("$2 == 1")] -> {
x86_64_clear_reg(inst, $1);
}
[reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
[reg, imm, if("($2 & ($2 - 1)) == 0")] -> {
/* x & (x - 1) is equal to zero if x is a power of 2 */
x86_64_and_reg_imm_size(inst, $1, $2 - 1, 4);
}
[=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 4);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $4, 4);
}
[=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $3, $3, 4);
@ -1170,12 +1215,12 @@ JIT_OP_LDIV: more_space
x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 8);
x86_64_sar_reg_imm_size(inst, $1, shift, 8);
}
[reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 8);
x86_64_cqo(inst);
x86_64_idiv_reg_size(inst, $3, 8);
}
[reg("rax"), reg, scratch reg("rdx")] -> {
[reg("rax"), dreg, scratch reg("rdx")] -> {
jit_long min_long = jit_min_long;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@ -1214,12 +1259,12 @@ JIT_OP_LDIV_UN: more_space
}
x86_64_shr_reg_imm_size(inst, $1, shift, 8);
}
[reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 8);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $3, 8);
}
[reg("rax"), reg, scratch reg("rdx")] -> {
[reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $2, $2, 8);
@ -1251,12 +1296,12 @@ JIT_OP_LREM: more_space
x86_patch(patch, inst);
x86_64_clear_reg(inst, $1);
}
[=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 8);
x86_64_cqo(inst);
x86_64_idiv_reg_size(inst, $4, 8);
}
[=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
jit_long min_long = jit_min_long;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@ -1301,12 +1346,12 @@ JIT_OP_LREM_UN: more_space
x86_64_and_reg_reg_size(inst, $1, $3, 8);
}
}
[=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 8);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $4, 8);
}
[=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $3, $3, 8);
@ -1367,6 +1412,22 @@ JIT_OP_FDIV:
x86_64_divss_reg_membase(inst, $1, X86_64_RBP, $2);
}
JIT_OP_FABS:
[xreg] -> {
/* Simply clear the sign */
jit_uint values[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
_jit_plops_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
}
JIT_OP_FNEG:
[xreg] -> {
/* Simply toggle the sign */
jit_uint values[4] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
_jit_plops_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
}
/*
* double precision float versions
*/
@ -1415,6 +1476,35 @@ JIT_OP_DDIV:
x86_64_divsd_reg_reg(inst, $1, $2);
}
JIT_OP_DABS:
[xreg] -> {
/* Simply clear the sign */
jit_ulong values[2] = {0x7fffffffffffffff, 0x7fffffffffffffff};
_jit_plopd_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
}
JIT_OP_DNEG:
[xreg] -> {
/* Simply toggle the sign */
jit_ulong values[2] = {0x8000000000000000, 0x8000000000000000};
_jit_plopd_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
}
/*
* native float versions
*/
JIT_OP_NFABS: stack
[freg] -> {
x86_64_fabs(inst);
}
JIT_OP_NFNEG: stack
[freg] -> {
x86_64_fchs(inst);
}
/*
* Bitwise opcodes.
*/
@ -1872,13 +1962,133 @@ JIT_OP_BR_LGE_UN: branch
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
JIT_OP_BR_FEQ:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x74 /* eq */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x74 /* eq */, insn);
}
JIT_OP_BR_FNE:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x75 /* ne */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x75 /* ne */, insn);
}
JIT_OP_BR_FLT:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
}
JIT_OP_BR_FLE:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x76 /* le_un */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x76 /* le_un */, insn);
}
JIT_OP_BR_FGT:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
}
JIT_OP_BR_FGE:
[xreg, local] -> {
x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
[xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
JIT_OP_BR_DEQ:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x74 /* eq */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x74 /* eq */, insn);
}
JIT_OP_BR_DNE:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x75 /* ne */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x75 /* ne */, insn);
}
JIT_OP_BR_DLT:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
}
JIT_OP_BR_DLE:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x76 /* le_un */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x76 /* le_un */, insn);
}
JIT_OP_BR_DGT:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
}
JIT_OP_BR_DGE:
[xreg, local] -> {
x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
[xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $1, $2);
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
/*
* Comparison opcodes.
*/
JIT_OP_IEQ:
[=reg, reg, immzero] -> {
x86_64_or_reg_reg_size(inst, $2, $2, 4);
x86_64_test_reg_reg_size(inst, $2, $2, 4);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
[=reg, reg, imm] -> {
@ -1896,7 +2106,7 @@ JIT_OP_IEQ:
JIT_OP_INE:
[=reg, reg, immzero] -> {
x86_64_or_reg_reg_size(inst, $2, $2, 4);
x86_64_test_reg_reg_size(inst, $2, $2, 4);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
[=reg, reg, imm] -> {
@ -2026,7 +2236,7 @@ JIT_OP_IGE_UN:
JIT_OP_LEQ:
[=reg, reg, immzero] -> {
x86_64_or_reg_reg_size(inst, $2, $2, 8);
x86_64_test_reg_reg_size(inst, $2, $2, 8);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
[=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> {
@ -2044,7 +2254,7 @@ JIT_OP_LEQ:
JIT_OP_LNE:
[=reg, reg, immzero] -> {
x86_64_or_reg_reg_size(inst, $2, $2, 8);
x86_64_test_reg_reg_size(inst, $2, $2, 8);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
[=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> {
@ -2172,6 +2382,232 @@ JIT_OP_LGE_UN:
inst = setcc_reg(inst, $1, X86_CC_GE, 0);
}
JIT_OP_FEQ:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
JIT_OP_FNE:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
JIT_OP_FLT:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_B, 0);
}
JIT_OP_FLE:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_BE, 0);
}
JIT_OP_FGT:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_A, 0);
}
JIT_OP_FGE:
[=reg, xreg, xreg] -> {
x86_64_comiss_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_AE, 0);
}
JIT_OP_DEQ:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
JIT_OP_DNE:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
JIT_OP_DLT:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_B, 0);
}
JIT_OP_DLE:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_BE, 0);
}
JIT_OP_DGT:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_A, 0);
}
JIT_OP_DGE:
[=reg, xreg, xreg] -> {
x86_64_comisd_reg_reg(inst, $2, $3);
inst = setcc_reg(inst, $1, X86_CC_AE, 0);
}
JIT_OP_FSQRT:
[=xreg, local] -> {
x86_64_sqrtss_reg_membase(inst, $1, X86_64_RBP, $2);
}
[=xreg, xreg] -> {
x86_64_sqrtss_reg_reg(inst, $1, $2);
}
JIT_OP_DSQRT:
[=xreg, local] -> {
x86_64_sqrtsd_reg_membase(inst, $1, X86_64_RBP, $2);
}
[=xreg, xreg] -> {
x86_64_sqrtsd_reg_reg(inst, $1, $2);
}
/*
* Absolute, minimum, maximum, and sign.
*/
JIT_OP_IMAX:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 4);
}
JIT_OP_IMAX_UN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 4);
}
JIT_OP_IMIN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 4);
}
JIT_OP_IMIN_UN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 4);
}
JIT_OP_LMAX:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 8);
}
JIT_OP_LMAX_UN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 8);
}
JIT_OP_LMIN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 8);
}
JIT_OP_LMIN_UN:
[reg, reg] -> {
x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 8);
}
JIT_OP_FMAX:
[xreg, local] -> {
x86_64_maxss_reg_membase(inst, $1, X86_64_RBP, $2);
}
[xreg, xreg] -> {
x86_64_maxss_reg_reg(inst, $1, $2);
}
JIT_OP_FMIN:
[xreg, local] -> {
x86_64_minss_reg_membase(inst, $1, X86_64_RBP, $2);
}
[xreg, xreg] -> {
x86_64_minss_reg_reg(inst, $1, $2);
}
JIT_OP_DMAX:
[xreg, local] -> {
x86_64_maxsd_reg_membase(inst, $1, X86_64_RBP, $2);
}
[xreg, xreg] -> {
x86_64_maxsd_reg_reg(inst, $1, $2);
}
JIT_OP_DMIN:
[xreg, local] -> {
x86_64_minsd_reg_membase(inst, $1, X86_64_RBP, $2);
}
[xreg, xreg] -> {
x86_64_minsd_reg_reg(inst, $1, $2);
}
/*
* Rounding
*/
JIT_OP_FFLOOR: more_space
[=xreg, local, scratch reg] -> {
inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_DOWN);
}
[=xreg, xreg, scratch reg] -> {
inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
}
JIT_OP_DFLOOR: more_space
[=xreg, local, scratch reg] -> {
inst = x86_64_roundd_reg_membase(inst, $1, $2, $3, X86_ROUND_DOWN);
}
[=xreg, xreg, scratch reg] -> {
inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
}
JIT_OP_NFFLOOR: more_space
[freg, scratch reg] -> {
inst = x86_64_roundnf(inst, $2, X86_ROUND_DOWN);
}
JIT_OP_FCEIL: more_space
[=xreg, local, scratch reg] -> {
inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_UP);
}
[=xreg, xreg, scratch reg] -> {
inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
}
JIT_OP_DCEIL: more_space
[=xreg, local, scratch reg] -> {
inst = x86_64_roundd_reg_membase(inst, $1, $2, $3, X86_ROUND_UP);
}
[=xreg, xreg, scratch reg] -> {
inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
}
JIT_OP_NFCEIL: more_space
[freg, scratch reg] -> {
inst = x86_64_roundnf(inst, $2, X86_ROUND_UP);
}
/*
JIT_OP_FRINT: more_space
[=xreg, local, scratch reg] -> {
inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_ZERO);
}
[=xreg, xreg, scratch reg] -> {
inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_ZERO);
}
*/
/*
* Pointer check opcodes.
*/

Loading…
Cancel
Save