PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/pcre/pcre2lib/sljit - sljitNativeX86_common.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 474 1112 42.6 %
Date: 2022-01-26 Functions: 28 49 57.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *    Stack-less Just-In-Time compiler
       3             :  *
       4             :  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
       5             :  *
       6             :  * Redistribution and use in source and binary forms, with or without modification, are
       7             :  * permitted provided that the following conditions are met:
       8             :  *
       9             :  *   1. Redistributions of source code must retain the above copyright notice, this list of
      10             :  *      conditions and the following disclaimer.
      11             :  *
      12             :  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
      13             :  *      of conditions and the following disclaimer in the documentation and/or other materials
      14             :  *      provided with the distribution.
      15             :  *
      16             :  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
      17             :  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
      18             :  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
      19             :  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
      20             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
      21             :  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
      22             :  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
      23             :  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
      24             :  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      25             :  */
      26             : 
      27         298 : SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
      28             : {
      29             : #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
      30             :         return "x86" SLJIT_CPUINFO " ABI:fastcall";
      31             : #else
      32         298 :         return "x86" SLJIT_CPUINFO;
      33             : #endif
      34             : }
      35             : 
      36             : /*
      37             :    32b register indexes:
      38             :      0 - EAX
      39             :      1 - ECX
      40             :      2 - EDX
      41             :      3 - EBX
      42             :      4 - ESP
      43             :      5 - EBP
      44             :      6 - ESI
      45             :      7 - EDI
      46             : */
      47             : 
      48             : /*
      49             :    64b register indexes:
      50             :      0 - RAX
      51             :      1 - RCX
      52             :      2 - RDX
      53             :      3 - RBX
      54             :      4 - RSP
      55             :      5 - RBP
      56             :      6 - RSI
      57             :      7 - RDI
      58             :      8 - R8   - From now on REX prefix is required
      59             :      9 - R9
      60             :     10 - R10
      61             :     11 - R11
      62             :     12 - R12
      63             :     13 - R13
      64             :     14 - R14
      65             :     15 - R15
      66             : */
      67             : 
      68             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
      69             : 
      70             : /* Last register + 1. */
      71             : #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
      72             : 
      73             : static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
      74             :         0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
      75             : };
      76             : 
      77             : #define CHECK_EXTRA_REGS(p, w, do) \
      78             :         if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
      79             :                 if (p <= compiler->scratches) \
      80             :                         w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
      81             :                 else \
      82             :                         w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
      83             :                 p = SLJIT_MEM1(SLJIT_SP); \
      84             :                 do; \
      85             :         }
      86             : 
      87             : #else /* SLJIT_CONFIG_X86_32 */
      88             : 
      89             : /* Last register + 1. */
      90             : #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
      91             : #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
      92             : 
      93             : /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
      94             :    Note: avoid to use r12 and r13 for memory addessing
      95             :    therefore r12 is better to be a higher saved register. */
      96             : #ifndef _WIN64
      97             : /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
      98             : static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
      99             :         0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
     100             : };
     101             : /* low-map. reg_map & 0x7. */
     102             : static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
     103             :         0, 0, 6, 7, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
     104             : };
     105             : #else
     106             : /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
     107             : static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
     108             :         0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
     109             : };
     110             : /* low-map. reg_map & 0x7. */
     111             : static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
     112             :         0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
     113             : };
     114             : #endif
     115             : 
     116             : /* Args: xmm0-xmm3 */
     117             : static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
     118             :         4, 0, 1, 2, 3, 5, 6
     119             : };
     120             : /* low-map. freg_map & 0x7. */
     121             : static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
     122             :         4, 0, 1, 2, 3, 5, 6
     123             : };
     124             : 
     125             : #define REX_W           0x48
     126             : #define REX_R           0x44
     127             : #define REX_X           0x42
     128             : #define REX_B           0x41
     129             : #define REX             0x40
     130             : 
     131             : #ifndef _WIN64
     132             : #define HALFWORD_MAX 0x7fffffffl
     133             : #define HALFWORD_MIN -0x80000000l
     134             : #else
     135             : #define HALFWORD_MAX 0x7fffffffll
     136             : #define HALFWORD_MIN -0x80000000ll
     137             : #endif
     138             : 
     139             : #define IS_HALFWORD(x)          ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
     140             : #define NOT_HALFWORD(x)         ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
     141             : 
     142             : #define CHECK_EXTRA_REGS(p, w, do)
     143             : 
     144             : #endif /* SLJIT_CONFIG_X86_32 */
     145             : 
     146             : #define TMP_FREG        (0)
     147             : 
     148             : /* Size flags for emit_x86_instruction: */
     149             : #define EX86_BIN_INS            0x0010
     150             : #define EX86_SHIFT_INS          0x0020
     151             : #define EX86_REX                0x0040
     152             : #define EX86_NO_REXW            0x0080
     153             : #define EX86_BYTE_ARG           0x0100
     154             : #define EX86_HALF_ARG           0x0200
     155             : #define EX86_PREF_66            0x0400
     156             : #define EX86_PREF_F2            0x0800
     157             : #define EX86_PREF_F3            0x1000
     158             : #define EX86_SSE2_OP1           0x2000
     159             : #define EX86_SSE2_OP2           0x4000
     160             : #define EX86_SSE2               (EX86_SSE2_OP1 | EX86_SSE2_OP2)
     161             : 
     162             : /* --------------------------------------------------------------------- */
     163             : /*  Instrucion forms                                                     */
     164             : /* --------------------------------------------------------------------- */
     165             : 
     166             : #define ADD             (/* BINARY */ 0 << 3)
     167             : #define ADD_EAX_i32     0x05
     168             : #define ADD_r_rm        0x03
     169             : #define ADD_rm_r        0x01
     170             : #define ADDSD_x_xm      0x58
     171             : #define ADC             (/* BINARY */ 2 << 3)
     172             : #define ADC_EAX_i32     0x15
     173             : #define ADC_r_rm        0x13
     174             : #define ADC_rm_r        0x11
     175             : #define AND             (/* BINARY */ 4 << 3)
     176             : #define AND_EAX_i32     0x25
     177             : #define AND_r_rm        0x23
     178             : #define AND_rm_r        0x21
     179             : #define ANDPD_x_xm      0x54
     180             : #define BSR_r_rm        (/* GROUP_0F */ 0xbd)
     181             : #define CALL_i32        0xe8
     182             : #define CALL_rm         (/* GROUP_FF */ 2 << 3)
     183             : #define CDQ             0x99
     184             : #define CMOVE_r_rm      (/* GROUP_0F */ 0x44)
     185             : #define CMP             (/* BINARY */ 7 << 3)
     186             : #define CMP_EAX_i32     0x3d
     187             : #define CMP_r_rm        0x3b
     188             : #define CMP_rm_r        0x39
     189             : #define CVTPD2PS_x_xm   0x5a
     190             : #define CVTSI2SD_x_rm   0x2a
     191             : #define CVTTSD2SI_r_xm  0x2c
     192             : #define DIV             (/* GROUP_F7 */ 6 << 3)
     193             : #define DIVSD_x_xm      0x5e
     194             : #define FSTPS           0xd9
     195             : #define FSTPD           0xdd
     196             : #define INT3            0xcc
     197             : #define IDIV            (/* GROUP_F7 */ 7 << 3)
     198             : #define IMUL            (/* GROUP_F7 */ 5 << 3)
     199             : #define IMUL_r_rm       (/* GROUP_0F */ 0xaf)
     200             : #define IMUL_r_rm_i8    0x6b
     201             : #define IMUL_r_rm_i32   0x69
     202             : #define JE_i8           0x74
     203             : #define JNE_i8          0x75
     204             : #define JMP_i8          0xeb
     205             : #define JMP_i32         0xe9
     206             : #define JMP_rm          (/* GROUP_FF */ 4 << 3)
     207             : #define LEA_r_m         0x8d
     208             : #define MOV_r_rm        0x8b
     209             : #define MOV_r_i32       0xb8
     210             : #define MOV_rm_r        0x89
     211             : #define MOV_rm_i32      0xc7
     212             : #define MOV_rm8_i8      0xc6
     213             : #define MOV_rm8_r8      0x88
     214             : #define MOVSD_x_xm      0x10
     215             : #define MOVSD_xm_x      0x11
     216             : #define MOVSXD_r_rm     0x63
     217             : #define MOVSX_r_rm8     (/* GROUP_0F */ 0xbe)
     218             : #define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
     219             : #define MOVZX_r_rm8     (/* GROUP_0F */ 0xb6)
     220             : #define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
     221             : #define MUL             (/* GROUP_F7 */ 4 << 3)
     222             : #define MULSD_x_xm      0x59
     223             : #define NEG_rm          (/* GROUP_F7 */ 3 << 3)
     224             : #define NOP             0x90
     225             : #define NOT_rm          (/* GROUP_F7 */ 2 << 3)
     226             : #define OR              (/* BINARY */ 1 << 3)
     227             : #define OR_r_rm         0x0b
     228             : #define OR_EAX_i32      0x0d
     229             : #define OR_rm_r         0x09
     230             : #define OR_rm8_r8       0x08
     231             : #define POP_r           0x58
     232             : #define POP_rm          0x8f
     233             : #define POPF            0x9d
     234             : #define PREFETCH        0x18
     235             : #define PUSH_i32        0x68
     236             : #define PUSH_r          0x50
     237             : #define PUSH_rm         (/* GROUP_FF */ 6 << 3)
     238             : #define PUSHF           0x9c
     239             : #define RET_near        0xc3
     240             : #define RET_i16         0xc2
     241             : #define SBB             (/* BINARY */ 3 << 3)
     242             : #define SBB_EAX_i32     0x1d
     243             : #define SBB_r_rm        0x1b
     244             : #define SBB_rm_r        0x19
     245             : #define SAR             (/* SHIFT */ 7 << 3)
     246             : #define SHL             (/* SHIFT */ 4 << 3)
     247             : #define SHR             (/* SHIFT */ 5 << 3)
     248             : #define SUB             (/* BINARY */ 5 << 3)
     249             : #define SUB_EAX_i32     0x2d
     250             : #define SUB_r_rm        0x2b
     251             : #define SUB_rm_r        0x29
     252             : #define SUBSD_x_xm      0x5c
     253             : #define TEST_EAX_i32    0xa9
     254             : #define TEST_rm_r       0x85
     255             : #define UCOMISD_x_xm    0x2e
     256             : #define UNPCKLPD_x_xm   0x14
     257             : #define XCHG_EAX_r      0x90
     258             : #define XCHG_r_rm       0x87
     259             : #define XOR             (/* BINARY */ 6 << 3)
     260             : #define XOR_EAX_i32     0x35
     261             : #define XOR_r_rm        0x33
     262             : #define XOR_rm_r        0x31
     263             : #define XORPD_x_xm      0x57
     264             : 
     265             : #define GROUP_0F        0x0f
     266             : #define GROUP_F7        0xf7
     267             : #define GROUP_FF        0xff
     268             : #define GROUP_BINARY_81 0x81
     269             : #define GROUP_BINARY_83 0x83
     270             : #define GROUP_SHIFT_1   0xd1
     271             : #define GROUP_SHIFT_N   0xc1
     272             : #define GROUP_SHIFT_CL  0xd3
     273             : 
     274             : #define MOD_REG         0xc0
     275             : #define MOD_DISP8       0x40
     276             : 
     277             : #define INC_SIZE(s)                     (*inst++ = (s), compiler->size += (s))
     278             : 
     279             : #define PUSH_REG(r)                     (*inst++ = (PUSH_r + (r)))
     280             : #define POP_REG(r)                      (*inst++ = (POP_r + (r)))
     281             : #define RET()                           (*inst++ = (RET_near))
     282             : #define RET_I16(n)                      (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
     283             : /* r32, r/m32 */
     284             : #define MOV_RM(mod, reg, rm)            (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
     285             : 
     286             : /* Multithreading does not affect these static variables, since they store
     287             :    built-in CPU features. Therefore they can be overwritten by different threads
     288             :    if they detect the CPU features in the same time. */
     289             : #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     290             : static sljit_s32 cpu_has_sse2 = -1;
     291             : #endif
     292             : static sljit_s32 cpu_has_cmov = -1;
     293             : 
     294             : #ifdef _WIN32_WCE
     295             : #include <cmnintrin.h>
     296             : #elif defined(_MSC_VER) && _MSC_VER >= 1400
     297             : #include <intrin.h>
     298             : #endif
     299             : 
     300             : /******************************************************/
     301             : /*    Unaligned-store functions                       */
     302             : /******************************************************/
     303             : 
     304           0 : static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
     305             : {
     306           0 :         SLJIT_MEMCPY(addr, &value, sizeof(value));
     307           0 : }
     308             : 
     309    22044845 : static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
     310             : {
     311    22044845 :         SLJIT_MEMCPY(addr, &value, sizeof(value));
     312    22044845 : }
     313             : 
     314           0 : static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
     315             : {
     316           0 :         SLJIT_MEMCPY(addr, &value, sizeof(value));
     317           0 : }
     318             : 
     319             : /******************************************************/
     320             : /*    Utility functions                               */
     321             : /******************************************************/
     322             : 
     323         139 : static void get_cpu_features(void)
     324             : {
     325             :         sljit_u32 features;
     326             : 
     327             : #if defined(_MSC_VER) && _MSC_VER >= 1400
     328             : 
     329             :         int CPUInfo[4];
     330             :         __cpuid(CPUInfo, 1);
     331             :         features = (sljit_u32)CPUInfo[3];
     332             : 
     333             : #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
     334             : 
     335             :         /* AT&T syntax. */
     336         139 :         __asm__ (
     337             :                 "movl $0x1, %%eax\n"
     338             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     339             :                 /* On x86-32, there is no red zone, so this
     340             :                    should work (no need for a local variable). */
     341             :                 "push %%ebx\n"
     342             : #endif
     343             :                 "cpuid\n"
     344             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     345             :                 "pop %%ebx\n"
     346             : #endif
     347             :                 "movl %%edx, %0\n"
     348             :                 : "=g" (features)
     349             :                 :
     350             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     351             :                 : "%eax", "%ecx", "%edx"
     352             : #else
     353             :                 : "%rax", "%rbx", "%rcx", "%rdx"
     354             : #endif
     355             :         );
     356             : 
     357             : #else /* _MSC_VER && _MSC_VER >= 1400 */
     358             : 
     359             :         /* Intel syntax. */
     360             :         __asm {
     361             :                 mov eax, 1
     362             :                 cpuid
     363             :                 mov features, edx
     364             :         }
     365             : 
     366             : #endif /* _MSC_VER && _MSC_VER >= 1400 */
     367             : 
     368             : #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     369             :         cpu_has_sse2 = (features >> 26) & 0x1;
     370             : #endif
     371         139 :         cpu_has_cmov = (features >> 15) & 0x1;
     372         139 : }
     373             : 
     374    19892029 : static sljit_u8 get_jump_code(sljit_s32 type)
     375             : {
     376    19892029 :         switch (type) {
     377     8901506 :         case SLJIT_EQUAL:
     378             :         case SLJIT_EQUAL_F64:
     379     8901506 :                 return 0x84 /* je */;
     380             : 
     381     1833172 :         case SLJIT_NOT_EQUAL:
     382             :         case SLJIT_NOT_EQUAL_F64:
     383     1833172 :                 return 0x85 /* jne */;
     384             : 
     385      179789 :         case SLJIT_LESS:
     386             :         case SLJIT_LESS_F64:
     387      179789 :                 return 0x82 /* jc */;
     388             : 
     389     8659525 :         case SLJIT_GREATER_EQUAL:
     390             :         case SLJIT_GREATER_EQUAL_F64:
     391     8659525 :                 return 0x83 /* jae */;
     392             : 
     393      278505 :         case SLJIT_GREATER:
     394             :         case SLJIT_GREATER_F64:
     395      278505 :                 return 0x87 /* jnbe */;
     396             : 
     397       39528 :         case SLJIT_LESS_EQUAL:
     398             :         case SLJIT_LESS_EQUAL_F64:
     399       39528 :                 return 0x86 /* jbe */;
     400             : 
     401           0 :         case SLJIT_SIG_LESS:
     402           0 :                 return 0x8c /* jl */;
     403             : 
     404           0 :         case SLJIT_SIG_GREATER_EQUAL:
     405           0 :                 return 0x8d /* jnl */;
     406             : 
     407           0 :         case SLJIT_SIG_GREATER:
     408           0 :                 return 0x8f /* jnle */;
     409             : 
     410           4 :         case SLJIT_SIG_LESS_EQUAL:
     411           4 :                 return 0x8e /* jle */;
     412             : 
     413           0 :         case SLJIT_OVERFLOW:
     414             :         case SLJIT_MUL_OVERFLOW:
     415           0 :                 return 0x80 /* jo */;
     416             : 
     417           0 :         case SLJIT_NOT_OVERFLOW:
     418             :         case SLJIT_MUL_NOT_OVERFLOW:
     419           0 :                 return 0x81 /* jno */;
     420             : 
     421           0 :         case SLJIT_UNORDERED_F64:
     422           0 :                 return 0x8a /* jp */;
     423             : 
     424           0 :         case SLJIT_ORDERED_F64:
     425           0 :                 return 0x8b /* jpo */;
     426             :         }
     427           0 :         return 0;
     428             : }
     429             : 
     430             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     431             : static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
     432             : #else
     433             : static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
     434             : #endif
     435             : 
     436    20439855 : static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
     437             : {
     438             :         sljit_s32 short_jump;
     439             :         sljit_uw label_addr;
     440             : 
     441    20439855 :         if (jump->flags & JUMP_LABEL)
     442    20388200 :                 label_addr = (sljit_uw)(code + jump->u.label->size);
     443             :         else
     444       51655 :                 label_addr = jump->u.target - executable_offset;
     445             : 
     446    20439855 :         short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
     447             : 
     448             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     449    20439855 :         if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
     450        8269 :                 return generate_far_jump_code(jump, code_ptr, type);
     451             : #endif
     452             : 
     453    20431586 :         if (type == SLJIT_JUMP) {
     454      451294 :                 if (short_jump)
     455      335538 :                         *code_ptr++ = JMP_i8;
     456             :                 else
     457      115756 :                         *code_ptr++ = JMP_i32;
     458      451294 :                 jump->addr++;
     459             :         }
     460    19980292 :         else if (type >= SLJIT_FAST_CALL) {
     461       88535 :                 short_jump = 0;
     462       88535 :                 *code_ptr++ = CALL_i32;
     463       88535 :                 jump->addr++;
     464             :         }
     465    19891757 :         else if (short_jump) {
     466      301106 :                 *code_ptr++ = get_jump_code(type) - 0x10;
     467      301106 :                 jump->addr++;
     468             :         }
     469             :         else {
     470    19590651 :                 *code_ptr++ = GROUP_0F;
     471    19590651 :                 *code_ptr++ = get_jump_code(type);
     472    19590651 :                 jump->addr += 2;
     473             :         }
     474             : 
     475    20431586 :         if (short_jump) {
     476      636644 :                 jump->flags |= PATCH_MB;
     477      636644 :                 code_ptr += sizeof(sljit_s8);
     478             :         } else {
     479    19794942 :                 jump->flags |= PATCH_MW;
     480    19794942 :                 code_ptr += sizeof(sljit_s32);
     481             :         }
     482             : 
     483    20431586 :         return code_ptr;
     484             : }
     485             : 
     486       51654 : SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
     487             : {
     488             :         struct sljit_memory_fragment *buf;
     489             :         sljit_u8 *code;
     490             :         sljit_u8 *code_ptr;
     491             :         sljit_u8 *buf_ptr;
     492             :         sljit_u8 *buf_end;
     493             :         sljit_u8 len;
     494             :         sljit_sw executable_offset;
     495             :         sljit_sw jump_addr;
     496             : 
     497             :         struct sljit_label *label;
     498             :         struct sljit_jump *jump;
     499             :         struct sljit_const *const_;
     500             : 
     501       51654 :         CHECK_ERROR_PTR();
     502             :         CHECK_PTR(check_sljit_generate_code(compiler));
     503       51654 :         reverse_buf(compiler);
     504             : 
     505             :         /* Second code generation pass. */
     506       51654 :         code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
     507       51654 :         PTR_FAIL_WITH_EXEC_IF(code);
     508       51654 :         buf = compiler->buf;
     509             : 
     510       51654 :         code_ptr = code;
     511       51654 :         label = compiler->labels;
     512       51654 :         jump = compiler->jumps;
     513       51654 :         const_ = compiler->consts;
     514       51654 :         executable_offset = SLJIT_EXEC_OFFSET(code);
     515             : 
     516             :         do {
     517      103203 :                 buf_ptr = buf->memory;
     518      103203 :                 buf_end = buf_ptr + buf->used_size;
     519             :                 do {
     520    65433079 :                         len = *buf_ptr++;
     521    65433079 :                         if (len > 0) {
     522             :                                 /* The code is already generated. */
     523    43736665 :                                 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
     524    43736665 :                                 code_ptr += len;
     525    43736665 :                                 buf_ptr += len;
     526             :                         }
     527             :                         else {
     528    21696414 :                                 if (*buf_ptr >= 2) {
     529    20439855 :                                         jump->addr = (sljit_uw)code_ptr;
     530    20439855 :                                         if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
     531    20439855 :                                                 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
     532             :                                         else {
     533             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     534             :                                                 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
     535             : #else
     536           0 :                                                 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
     537             : #endif
     538             :                                         }
     539    20439855 :                                         jump = jump->next;
     540             :                                 }
     541     1256559 :                                 else if (*buf_ptr == 0) {
     542     1256559 :                                         label->addr = ((sljit_uw)code_ptr) + executable_offset;
     543     1256559 :                                         label->size = code_ptr - code;
     544     1256559 :                                         label = label->next;
     545             :                                 }
     546             :                                 else { /* *buf_ptr is 1 */
     547           0 :                                         const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
     548           0 :                                         const_ = const_->next;
     549             :                                 }
     550    21696414 :                                 buf_ptr++;
     551             :                         }
     552    65433079 :                 } while (buf_ptr < buf_end);
     553             :                 SLJIT_ASSERT(buf_ptr == buf_end);
     554      103203 :                 buf = buf->next;
     555      103203 :         } while (buf);
     556             : 
     557             :         SLJIT_ASSERT(!label);
     558             :         SLJIT_ASSERT(!jump);
     559             :         SLJIT_ASSERT(!const_);
     560             : 
     561       51654 :         jump = compiler->jumps;
     562    20543163 :         while (jump) {
     563    20439855 :                 jump_addr = jump->addr + executable_offset;
     564             : 
     565    20439855 :                 if (jump->flags & PATCH_MB) {
     566             :                         SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
     567      636644 :                         *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
     568    19803211 :                 } else if (jump->flags & PATCH_MW) {
     569    19794942 :                         if (jump->flags & JUMP_LABEL) {
     570             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     571             :                                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
     572             : #else
     573             :                                 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
     574    19751556 :                                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
     575             : #endif
     576             :                         }
     577             :                         else {
     578             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     579             :                                 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
     580             : #else
     581             :                                 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
     582       43386 :                                 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
     583             : #endif
     584             :                         }
     585             :                 }
     586             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     587        8269 :                 else if (jump->flags & PATCH_MD)
     588           0 :                         sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
     589             : #endif
     590             : 
     591    20439855 :                 jump = jump->next;
     592             :         }
     593             : 
     594             :         /* Some space may be wasted because of short jumps. */
     595             :         SLJIT_ASSERT(code_ptr <= code + compiler->size);
     596       51654 :         compiler->error = SLJIT_ERR_COMPILED;
     597       51654 :         compiler->executable_offset = executable_offset;
     598       51654 :         compiler->executable_size = code_ptr - code;
     599       51654 :         return (void*)(code + executable_offset);
     600             : }
     601             : 
     602        2474 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
     603             : {
     604        2474 :         switch (feature_type) {
     605           0 :         case SLJIT_HAS_FPU:
     606             : #ifdef SLJIT_IS_FPU_AVAILABLE
     607             :                 return SLJIT_IS_FPU_AVAILABLE;
     608             : #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     609             :                 if (cpu_has_sse2 == -1)
     610             :                         get_cpu_features();
     611             :                 return cpu_has_sse2;
     612             : #else /* SLJIT_DETECT_SSE2 */
     613           0 :                 return 1;
     614             : #endif /* SLJIT_DETECT_SSE2 */
     615             : 
     616             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     617             :         case SLJIT_HAS_VIRTUAL_REGISTERS:
     618             :                 return 1;
     619             : #endif
     620             : 
     621        2474 :         case SLJIT_HAS_CLZ:
     622             :         case SLJIT_HAS_CMOV:
     623        2474 :                 if (cpu_has_cmov == -1)
     624         139 :                         get_cpu_features();
     625        2474 :                 return cpu_has_cmov;
     626             : 
     627           0 :         case SLJIT_HAS_SSE2:
     628             : #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     629             :                 if (cpu_has_sse2 == -1)
     630             :                         get_cpu_features();
     631             :                 return cpu_has_sse2;
     632             : #else
     633           0 :                 return 1;
     634             : #endif
     635             : 
     636           0 :         default:
     637           0 :                 return 0;
     638             :         }
     639             : }
     640             : 
     641             : /* --------------------------------------------------------------------- */
     642             : /*  Operators                                                            */
     643             : /* --------------------------------------------------------------------- */
     644             : 
     645             : #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
     646             : 
     647             : static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
     648             :         sljit_u32 op_types,
     649             :         sljit_s32 dst, sljit_sw dstw,
     650             :         sljit_s32 src1, sljit_sw src1w,
     651             :         sljit_s32 src2, sljit_sw src2w);
     652             : 
     653             : static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
     654             :         sljit_u32 op_types,
     655             :         sljit_s32 dst, sljit_sw dstw,
     656             :         sljit_s32 src1, sljit_sw src1w,
     657             :         sljit_s32 src2, sljit_sw src2w);
     658             : 
     659             : static sljit_s32 emit_mov(struct sljit_compiler *compiler,
     660             :         sljit_s32 dst, sljit_sw dstw,
     661             :         sljit_s32 src, sljit_sw srcw);
     662             : 
     663             : #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
     664             :         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
     665             : 
     666             : static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
     667             :         sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
     668             : 
     669             : static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
     670             :         sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
     671             : 
     672             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     673             : #include "sljitNativeX86_32.c"
     674             : #else
     675             : #include "sljitNativeX86_64.c"
     676             : #endif
     677             : 
     678     2623442 : static sljit_s32 emit_mov(struct sljit_compiler *compiler,
     679             :         sljit_s32 dst, sljit_sw dstw,
     680             :         sljit_s32 src, sljit_sw srcw)
     681             : {
     682             :         sljit_u8* inst;
     683             : 
     684             :         SLJIT_ASSERT(dst != SLJIT_UNUSED);
     685             : 
     686     2623442 :         if (FAST_IS_REG(src)) {
     687     1144519 :                 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
     688     1144519 :                 FAIL_IF(!inst);
     689     1144519 :                 *inst = MOV_rm_r;
     690     1144519 :                 return SLJIT_SUCCESS;
     691             :         }
     692     1478923 :         if (src & SLJIT_IMM) {
     693      340254 :                 if (FAST_IS_REG(dst)) {
     694             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     695             :                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
     696             : #else
     697      335357 :                         if (!compiler->mode32) {
     698      335357 :                                 if (NOT_HALFWORD(srcw))
     699           0 :                                         return emit_load_imm64(compiler, dst, srcw);
     700             :                         }
     701             :                         else
     702           0 :                                 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
     703             : #endif
     704             :                 }
     705             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     706      340254 :                 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
     707             :                         /* Immediate to memory move. Only SLJIT_MOV operation copies
     708             :                            an immediate directly into memory so TMP_REG1 can be used. */
     709           0 :                         FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
     710           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
     711           0 :                         FAIL_IF(!inst);
     712           0 :                         *inst = MOV_rm_r;
     713           0 :                         return SLJIT_SUCCESS;
     714             :                 }
     715             : #endif
     716      340254 :                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
     717      340254 :                 FAIL_IF(!inst);
     718      340254 :                 *inst = MOV_rm_i32;
     719      340254 :                 return SLJIT_SUCCESS;
     720             :         }
     721     1138669 :         if (FAST_IS_REG(dst)) {
     722     1138640 :                 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
     723     1138640 :                 FAIL_IF(!inst);
     724     1138640 :                 *inst = MOV_r_rm;
     725     1138640 :                 return SLJIT_SUCCESS;
     726             :         }
     727             : 
     728             :         /* Memory to memory move. Only SLJIT_MOV operation copies
     729             :            data from memory to memory so TMP_REG1 can be used. */
     730          29 :         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
     731          29 :         FAIL_IF(!inst);
     732          29 :         *inst = MOV_r_rm;
     733          29 :         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
     734          29 :         FAIL_IF(!inst);
     735          29 :         *inst = MOV_rm_r;
     736          29 :         return SLJIT_SUCCESS;
     737             : }
     738             : 
     739           0 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
     740             : {
     741             :         sljit_u8 *inst;
     742             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     743             :         sljit_s32 size;
     744             : #endif
     745             : 
     746           0 :         CHECK_ERROR();
     747             :         CHECK(check_sljit_emit_op0(compiler, op));
     748             : 
     749           0 :         switch (GET_OPCODE(op)) {
     750           0 :         case SLJIT_BREAKPOINT:
     751           0 :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
     752           0 :                 FAIL_IF(!inst);
     753           0 :                 INC_SIZE(1);
     754           0 :                 *inst = INT3;
     755           0 :                 break;
     756           0 :         case SLJIT_NOP:
     757           0 :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
     758           0 :                 FAIL_IF(!inst);
     759           0 :                 INC_SIZE(1);
     760           0 :                 *inst = NOP;
     761           0 :                 break;
     762           0 :         case SLJIT_LMUL_UW:
     763             :         case SLJIT_LMUL_SW:
     764             :         case SLJIT_DIVMOD_UW:
     765             :         case SLJIT_DIVMOD_SW:
     766             :         case SLJIT_DIV_UW:
     767             :         case SLJIT_DIV_SW:
     768             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     769             : #ifdef _WIN64
     770             :                 SLJIT_ASSERT(
     771             :                         reg_map[SLJIT_R0] == 0
     772             :                         && reg_map[SLJIT_R1] == 2
     773             :                         && reg_map[TMP_REG1] > 7);
     774             : #else
     775             :                 SLJIT_ASSERT(
     776             :                         reg_map[SLJIT_R0] == 0
     777             :                         && reg_map[SLJIT_R1] < 7
     778             :                         && reg_map[TMP_REG1] == 2);
     779             : #endif
     780           0 :                 compiler->mode32 = op & SLJIT_I32_OP;
     781             : #endif
     782           0 :                 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
     783             : 
     784           0 :                 op = GET_OPCODE(op);
     785           0 :                 if ((op | 0x2) == SLJIT_DIV_UW) {
     786             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
     787             :                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
     788             :                         inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
     789             : #else
     790           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
     791             : #endif
     792           0 :                         FAIL_IF(!inst);
     793           0 :                         *inst = XOR_r_rm;
     794             :                 }
     795             : 
     796           0 :                 if ((op | 0x2) == SLJIT_DIV_SW) {
     797             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
     798             :                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
     799             : #endif
     800             : 
     801             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     802             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
     803             :                         FAIL_IF(!inst);
     804             :                         INC_SIZE(1);
     805             :                         *inst = CDQ;
     806             : #else
     807           0 :                         if (compiler->mode32) {
     808           0 :                                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
     809           0 :                                 FAIL_IF(!inst);
     810           0 :                                 INC_SIZE(1);
     811           0 :                                 *inst = CDQ;
     812             :                         } else {
     813           0 :                                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
     814           0 :                                 FAIL_IF(!inst);
     815           0 :                                 INC_SIZE(2);
     816           0 :                                 *inst++ = REX_W;
     817           0 :                                 *inst = CDQ;
     818             :                         }
     819             : #endif
     820             :                 }
     821             : 
     822             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     823             :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
     824             :                 FAIL_IF(!inst);
     825             :                 INC_SIZE(2);
     826             :                 *inst++ = GROUP_F7;
     827             :                 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
     828             : #else
     829             : #ifdef _WIN64
     830             :                 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
     831             : #else
     832           0 :                 size = (!compiler->mode32) ? 3 : 2;
     833             : #endif
     834           0 :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
     835           0 :                 FAIL_IF(!inst);
     836           0 :                 INC_SIZE(size);
     837             : #ifdef _WIN64
     838             :                 if (!compiler->mode32)
     839             :                         *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
     840             :                 else if (op >= SLJIT_DIVMOD_UW)
     841             :                         *inst++ = REX_B;
     842             :                 *inst++ = GROUP_F7;
     843             :                 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
     844             : #else
     845           0 :                 if (!compiler->mode32)
     846           0 :                         *inst++ = REX_W;
     847           0 :                 *inst++ = GROUP_F7;
     848           0 :                 *inst = MOD_REG | reg_map[SLJIT_R1];
     849             : #endif
     850             : #endif
     851           0 :                 switch (op) {
     852           0 :                 case SLJIT_LMUL_UW:
     853           0 :                         *inst |= MUL;
     854           0 :                         break;
     855           0 :                 case SLJIT_LMUL_SW:
     856           0 :                         *inst |= IMUL;
     857           0 :                         break;
     858           0 :                 case SLJIT_DIVMOD_UW:
     859             :                 case SLJIT_DIV_UW:
     860           0 :                         *inst |= DIV;
     861           0 :                         break;
     862           0 :                 case SLJIT_DIVMOD_SW:
     863             :                 case SLJIT_DIV_SW:
     864           0 :                         *inst |= IDIV;
     865           0 :                         break;
     866             :                 }
     867             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
     868           0 :                 if (op <= SLJIT_DIVMOD_SW)
     869           0 :                         EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
     870             : #else
     871             :                 if (op >= SLJIT_DIV_UW)
     872             :                         EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
     873             : #endif
     874           0 :                 break;
     875             :         }
     876             : 
     877           0 :         return SLJIT_SUCCESS;
     878             : }
     879             : 
     880             : #define ENCODE_PREFIX(prefix) \
     881             :         do { \
     882             :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
     883             :                 FAIL_IF(!inst); \
     884             :                 INC_SIZE(1); \
     885             :                 *inst = (prefix); \
     886             :         } while (0)
     887             : 
     888     9013232 : static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
     889             :         sljit_s32 dst, sljit_sw dstw,
     890             :         sljit_s32 src, sljit_sw srcw)
     891             : {
     892             :         sljit_u8* inst;
     893             :         sljit_s32 dst_r;
     894             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     895             :         sljit_s32 work_r;
     896             : #endif
     897             : 
     898             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     899     9013232 :         compiler->mode32 = 0;
     900             : #endif
     901             : 
     902     9013232 :         if (src & SLJIT_IMM) {
     903           0 :                 if (FAST_IS_REG(dst)) {
     904             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     905             :                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
     906             : #else
     907           0 :                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
     908           0 :                         FAIL_IF(!inst);
     909           0 :                         *inst = MOV_rm_i32;
     910           0 :                         return SLJIT_SUCCESS;
     911             : #endif
     912             :                 }
     913           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
     914           0 :                 FAIL_IF(!inst);
     915           0 :                 *inst = MOV_rm8_i8;
     916           0 :                 return SLJIT_SUCCESS;
     917             :         }
     918             : 
     919     9013232 :         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
     920             : 
     921     9013232 :         if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
     922             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     923             :                 if (reg_map[src] >= 4) {
     924             :                         SLJIT_ASSERT(dst_r == TMP_REG1);
     925             :                         EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
     926             :                 } else
     927             :                         dst_r = src;
     928             : #else
     929           0 :                 dst_r = src;
     930             : #endif
     931             :         }
     932             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     933             :         else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
     934             :                 /* src, dst are registers. */
     935             :                 SLJIT_ASSERT(SLOW_IS_REG(dst));
     936             :                 if (reg_map[dst] < 4) {
     937             :                         if (dst != src)
     938             :                                 EMIT_MOV(compiler, dst, 0, src, 0);
     939             :                         inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
     940             :                         FAIL_IF(!inst);
     941             :                         *inst++ = GROUP_0F;
     942             :                         *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
     943             :                 }
     944             :                 else {
     945             :                         if (dst != src)
     946             :                                 EMIT_MOV(compiler, dst, 0, src, 0);
     947             :                         if (sign) {
     948             :                                 /* shl reg, 24 */
     949             :                                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
     950             :                                 FAIL_IF(!inst);
     951             :                                 *inst |= SHL;
     952             :                                 /* sar reg, 24 */
     953             :                                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
     954             :                                 FAIL_IF(!inst);
     955             :                                 *inst |= SAR;
     956             :                         }
     957             :                         else {
     958             :                                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
     959             :                                 FAIL_IF(!inst);
     960             :                                 *(inst + 1) |= AND;
     961             :                         }
     962             :                 }
     963             :                 return SLJIT_SUCCESS;
     964             :         }
     965             : #endif
     966             :         else {
     967             :                 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
     968     9013232 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
     969     9013232 :                 FAIL_IF(!inst);
     970     9013232 :                 *inst++ = GROUP_0F;
     971     9013232 :                 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
     972             :         }
     973             : 
     974     9013232 :         if (dst & SLJIT_MEM) {
     975             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     976             :                 if (dst_r == TMP_REG1) {
     977             :                         /* Find a non-used register, whose reg_map[src] < 4. */
     978             :                         if ((dst & REG_MASK) == SLJIT_R0) {
     979             :                                 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
     980             :                                         work_r = SLJIT_R2;
     981             :                                 else
     982             :                                         work_r = SLJIT_R1;
     983             :                         }
     984             :                         else {
     985             :                                 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
     986             :                                         work_r = SLJIT_R0;
     987             :                                 else if ((dst & REG_MASK) == SLJIT_R1)
     988             :                                         work_r = SLJIT_R2;
     989             :                                 else
     990             :                                         work_r = SLJIT_R1;
     991             :                         }
     992             : 
     993             :                         if (work_r == SLJIT_R0) {
     994             :                                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
     995             :                         }
     996             :                         else {
     997             :                                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
     998             :                                 FAIL_IF(!inst);
     999             :                                 *inst = XCHG_r_rm;
    1000             :                         }
    1001             : 
    1002             :                         inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
    1003             :                         FAIL_IF(!inst);
    1004             :                         *inst = MOV_rm8_r8;
    1005             : 
    1006             :                         if (work_r == SLJIT_R0) {
    1007             :                                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    1008             :                         }
    1009             :                         else {
    1010             :                                 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    1011             :                                 FAIL_IF(!inst);
    1012             :                                 *inst = XCHG_r_rm;
    1013             :                         }
    1014             :                 }
    1015             :                 else {
    1016             :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    1017             :                         FAIL_IF(!inst);
    1018             :                         *inst = MOV_rm8_r8;
    1019             :                 }
    1020             : #else
    1021           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
    1022           0 :                 FAIL_IF(!inst);
    1023           0 :                 *inst = MOV_rm8_r8;
    1024             : #endif
    1025             :         }
    1026             : 
    1027     9013232 :         return SLJIT_SUCCESS;
    1028             : }
    1029             : 
    1030           0 : static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
    1031             :         sljit_s32 src, sljit_sw srcw)
    1032             : {
    1033             :         sljit_u8* inst;
    1034             : 
    1035             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1036           0 :         compiler->mode32 = 1;
    1037             : #endif
    1038             : 
    1039           0 :         inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
    1040           0 :         FAIL_IF(!inst);
    1041           0 :         *inst++ = GROUP_0F;
    1042           0 :         *inst++ = PREFETCH;
    1043             : 
    1044           0 :         if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
    1045           0 :                 *inst |= (3 << 3);
    1046           0 :         else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
    1047           0 :                 *inst |= (2 << 3);
    1048             :         else
    1049           0 :                 *inst |= (1 << 3);
    1050             : 
    1051           0 :         return SLJIT_SUCCESS;
    1052             : }
    1053             : 
    1054       56725 : static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
    1055             :         sljit_s32 dst, sljit_sw dstw,
    1056             :         sljit_s32 src, sljit_sw srcw)
    1057             : {
    1058             :         sljit_u8* inst;
    1059             :         sljit_s32 dst_r;
    1060             : 
    1061             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1062       56725 :         compiler->mode32 = 0;
    1063             : #endif
    1064             : 
    1065       56725 :         if (src & SLJIT_IMM) {
    1066           0 :                 if (FAST_IS_REG(dst)) {
    1067             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1068             :                         return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    1069             : #else
    1070           0 :                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
    1071           0 :                         FAIL_IF(!inst);
    1072           0 :                         *inst = MOV_rm_i32;
    1073           0 :                         return SLJIT_SUCCESS;
    1074             : #endif
    1075             :                 }
    1076           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
    1077           0 :                 FAIL_IF(!inst);
    1078           0 :                 *inst = MOV_rm_i32;
    1079           0 :                 return SLJIT_SUCCESS;
    1080             :         }
    1081             : 
    1082       56725 :         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    1083             : 
    1084       56725 :         if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
    1085           0 :                 dst_r = src;
    1086             :         else {
    1087       56725 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    1088       56725 :                 FAIL_IF(!inst);
    1089       56725 :                 *inst++ = GROUP_0F;
    1090       56725 :                 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
    1091             :         }
    1092             : 
    1093       56725 :         if (dst & SLJIT_MEM) {
    1094           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
    1095           0 :                 FAIL_IF(!inst);
    1096           0 :                 *inst = MOV_rm_r;
    1097             :         }
    1098             : 
    1099       56725 :         return SLJIT_SUCCESS;
    1100             : }
    1101             : 
    1102           4 : static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
    1103             :         sljit_s32 dst, sljit_sw dstw,
    1104             :         sljit_s32 src, sljit_sw srcw)
    1105             : {
    1106             :         sljit_u8* inst;
    1107             : 
    1108           4 :         if (dst == src && dstw == srcw) {
    1109             :                 /* Same input and output */
    1110           4 :                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    1111           4 :                 FAIL_IF(!inst);
    1112           4 :                 *inst++ = GROUP_F7;
    1113           4 :                 *inst |= opcode;
    1114           4 :                 return SLJIT_SUCCESS;
    1115             :         }
    1116             : 
    1117           0 :         if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
    1118           0 :                 dst = TMP_REG1;
    1119             : 
    1120           0 :         if (FAST_IS_REG(dst)) {
    1121           0 :                 EMIT_MOV(compiler, dst, 0, src, srcw);
    1122           0 :                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
    1123           0 :                 FAIL_IF(!inst);
    1124           0 :                 *inst++ = GROUP_F7;
    1125           0 :                 *inst |= opcode;
    1126           0 :                 return SLJIT_SUCCESS;
    1127             :         }
    1128             : 
    1129           0 :         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
    1130           0 :         inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
    1131           0 :         FAIL_IF(!inst);
    1132           0 :         *inst++ = GROUP_F7;
    1133           0 :         *inst |= opcode;
    1134           0 :         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1135           0 :         return SLJIT_SUCCESS;
    1136             : }
    1137             : 
    1138           0 : static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
    1139             :         sljit_s32 dst, sljit_sw dstw,
    1140             :         sljit_s32 src, sljit_sw srcw)
    1141             : {
    1142             :         sljit_u8* inst;
    1143             : 
    1144           0 :         if (dst == SLJIT_UNUSED)
    1145           0 :                 dst = TMP_REG1;
    1146             : 
    1147           0 :         if (FAST_IS_REG(dst)) {
    1148           0 :                 EMIT_MOV(compiler, dst, 0, src, srcw);
    1149           0 :                 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
    1150           0 :                 FAIL_IF(!inst);
    1151           0 :                 *inst++ = GROUP_F7;
    1152           0 :                 *inst |= NOT_rm;
    1153           0 :                 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
    1154           0 :                 FAIL_IF(!inst);
    1155           0 :                 *inst = OR_r_rm;
    1156           0 :                 return SLJIT_SUCCESS;
    1157             :         }
    1158             : 
    1159           0 :         EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
    1160           0 :         inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
    1161           0 :         FAIL_IF(!inst);
    1162           0 :         *inst++ = GROUP_F7;
    1163           0 :         *inst |= NOT_rm;
    1164           0 :         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
    1165           0 :         FAIL_IF(!inst);
    1166           0 :         *inst = OR_r_rm;
    1167           0 :         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1168           0 :         return SLJIT_SUCCESS;
    1169             : }
    1170             : 
    1171             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1172             : static const sljit_sw emit_clz_arg = 32 + 31;
    1173             : #endif
    1174             : 
    1175           0 : static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
    1176             :         sljit_s32 dst, sljit_sw dstw,
    1177             :         sljit_s32 src, sljit_sw srcw)
    1178             : {
    1179             :         sljit_u8* inst;
    1180             :         sljit_s32 dst_r;
    1181             : 
    1182             :         SLJIT_UNUSED_ARG(op_flags);
    1183             : 
    1184           0 :         if (cpu_has_cmov == -1)
    1185           0 :                 get_cpu_features();
    1186             : 
    1187           0 :         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    1188             : 
    1189           0 :         inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    1190           0 :         FAIL_IF(!inst);
    1191           0 :         *inst++ = GROUP_0F;
    1192           0 :         *inst = BSR_r_rm;
    1193             : 
    1194             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1195             :         if (cpu_has_cmov) {
    1196             :                 if (dst_r != TMP_REG1) {
    1197             :                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
    1198             :                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
    1199             :                 }
    1200             :                 else
    1201             :                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
    1202             : 
    1203             :                 FAIL_IF(!inst);
    1204             :                 *inst++ = GROUP_0F;
    1205             :                 *inst = CMOVE_r_rm;
    1206             :         }
    1207             :         else
    1208             :                 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
    1209             : 
    1210             :         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
    1211             : #else
    1212           0 :         if (cpu_has_cmov) {
    1213           0 :                 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
    1214             : 
    1215           0 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
    1216           0 :                 FAIL_IF(!inst);
    1217           0 :                 *inst++ = GROUP_0F;
    1218           0 :                 *inst = CMOVE_r_rm;
    1219             :         }
    1220             :         else
    1221           0 :                 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
    1222             : 
    1223           0 :         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
    1224             : #endif
    1225             : 
    1226           0 :         FAIL_IF(!inst);
    1227           0 :         *(inst + 1) |= XOR;
    1228             : 
    1229           0 :         if (dst & SLJIT_MEM)
    1230           0 :                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1231           0 :         return SLJIT_SUCCESS;
    1232             : }
    1233             : 
    1234    13000272 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
    1235             :         sljit_s32 dst, sljit_sw dstw,
    1236             :         sljit_s32 src, sljit_sw srcw)
    1237             : {
    1238    13000272 :         sljit_s32 op_flags = GET_ALL_FLAGS(op);
    1239             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1240             :         sljit_s32 dst_is_ereg = 0;
    1241             : #endif
    1242             : 
    1243    13000272 :         CHECK_ERROR();
    1244             :         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
    1245    13000272 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    1246    13000272 :         ADJUST_LOCAL_OFFSET(src, srcw);
    1247             : 
    1248             :         CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
    1249             :         CHECK_EXTRA_REGS(src, srcw, (void)0);
    1250             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1251    13000272 :         compiler->mode32 = op_flags & SLJIT_I32_OP;
    1252             : #endif
    1253             : 
    1254    13000272 :         if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
    1255           0 :                 if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
    1256           0 :                         return emit_prefetch(compiler, op, src, srcw);
    1257           0 :                 return SLJIT_SUCCESS;
    1258             :         }
    1259             : 
    1260    13000272 :         op = GET_OPCODE(op);
    1261             : 
    1262    13000272 :         if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
    1263             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1264    13000268 :                 compiler->mode32 = 0;
    1265             : #endif
    1266             : 
    1267    13000268 :                 if (FAST_IS_REG(src) && src == dst) {
    1268           0 :                         if (!TYPE_CAST_NEEDED(op))
    1269           0 :                                 return SLJIT_SUCCESS;
    1270             :                 }
    1271             : 
    1272    13000268 :                 if (op_flags & SLJIT_I32_OP) {
    1273             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1274           0 :                         if (src & SLJIT_MEM) {
    1275           0 :                                 if (op == SLJIT_MOV_S32)
    1276           0 :                                         op = SLJIT_MOV_U32;
    1277             :                         }
    1278           0 :                         else if (src & SLJIT_IMM) {
    1279           0 :                                 if (op == SLJIT_MOV_U32)
    1280           0 :                                         op = SLJIT_MOV_S32;
    1281             :                         }
    1282             : #endif
    1283             :                 }
    1284             : 
    1285    13000268 :                 if (src & SLJIT_IMM) {
    1286      337948 :                         switch (op) {
    1287           0 :                         case SLJIT_MOV_U8:
    1288           0 :                                 srcw = (sljit_u8)srcw;
    1289           0 :                                 break;
    1290           0 :                         case SLJIT_MOV_S8:
    1291           0 :                                 srcw = (sljit_s8)srcw;
    1292           0 :                                 break;
    1293           0 :                         case SLJIT_MOV_U16:
    1294           0 :                                 srcw = (sljit_u16)srcw;
    1295           0 :                                 break;
    1296           0 :                         case SLJIT_MOV_S16:
    1297           0 :                                 srcw = (sljit_s16)srcw;
    1298           0 :                                 break;
    1299             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1300           0 :                         case SLJIT_MOV_U32:
    1301           0 :                                 srcw = (sljit_u32)srcw;
    1302           0 :                                 break;
    1303           0 :                         case SLJIT_MOV_S32:
    1304           0 :                                 srcw = (sljit_s32)srcw;
    1305           0 :                                 break;
    1306             : #endif
    1307             :                         }
    1308             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1309             :                         if (SLJIT_UNLIKELY(dst_is_ereg))
    1310             :                                 return emit_mov(compiler, dst, dstw, src, srcw);
    1311             : #endif
    1312             :                 }
    1313             : 
    1314             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1315             :                 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
    1316             :                         SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
    1317             :                         dst = TMP_REG1;
    1318             :                 }
    1319             : #endif
    1320             : 
    1321    13000268 :                 switch (op) {
    1322     2357877 :                 case SLJIT_MOV:
    1323             :                 case SLJIT_MOV_P:
    1324             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1325             :                 case SLJIT_MOV_U32:
    1326             :                 case SLJIT_MOV_S32:
    1327             : #endif
    1328     2357877 :                         FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
    1329     2357877 :                         break;
    1330     9013232 :                 case SLJIT_MOV_U8:
    1331     9013232 :                         FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
    1332     9013232 :                         break;
    1333           0 :                 case SLJIT_MOV_S8:
    1334           0 :                         FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
    1335           0 :                         break;
    1336       56725 :                 case SLJIT_MOV_U16:
    1337       56725 :                         FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
    1338       56725 :                         break;
    1339           0 :                 case SLJIT_MOV_S16:
    1340           0 :                         FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
    1341           0 :                         break;
    1342             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1343      103395 :                 case SLJIT_MOV_U32:
    1344      103395 :                         FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
    1345      103395 :                         break;
    1346     1469039 :                 case SLJIT_MOV_S32:
    1347     1469039 :                         FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
    1348     1469039 :                         break;
    1349             : #endif
    1350             :                 }
    1351             : 
    1352             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1353             :                 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
    1354             :                         return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
    1355             : #endif
    1356    13000268 :                 return SLJIT_SUCCESS;
    1357             :         }
    1358             : 
    1359           4 :         switch (op) {
    1360           0 :         case SLJIT_NOT:
    1361           0 :                 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
    1362           0 :                         return emit_not_with_flags(compiler, dst, dstw, src, srcw);
    1363           0 :                 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
    1364             : 
    1365           4 :         case SLJIT_NEG:
    1366           4 :                 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
    1367             : 
    1368           0 :         case SLJIT_CLZ:
    1369           0 :                 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
    1370             :         }
    1371             : 
    1372           0 :         return SLJIT_SUCCESS;
    1373             : }
    1374             : 
    1375             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1376             : 
    1377             : #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
    1378             :         if (IS_HALFWORD(immw) || compiler->mode32) { \
    1379             :                 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
    1380             :                 FAIL_IF(!inst); \
    1381             :                 *(inst + 1) |= (op_imm); \
    1382             :         } \
    1383             :         else { \
    1384             :                 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
    1385             :                 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
    1386             :                 FAIL_IF(!inst); \
    1387             :                 *inst = (op_mr); \
    1388             :         }
    1389             : 
    1390             : #define BINARY_EAX_IMM(op_eax_imm, immw) \
    1391             :         FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
    1392             : 
    1393             : #else
    1394             : 
    1395             : #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
    1396             :         inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
    1397             :         FAIL_IF(!inst); \
    1398             :         *(inst + 1) |= (op_imm);
    1399             : 
    1400             : #define BINARY_EAX_IMM(op_eax_imm, immw) \
    1401             :         FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
    1402             : 
    1403             : #endif
    1404             : 
    1405     9229139 : static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
    1406             :         sljit_u32 op_types,
    1407             :         sljit_s32 dst, sljit_sw dstw,
    1408             :         sljit_s32 src1, sljit_sw src1w,
    1409             :         sljit_s32 src2, sljit_sw src2w)
    1410             : {
    1411             :         sljit_u8* inst;
    1412     9229139 :         sljit_u8 op_eax_imm = (op_types >> 24);
    1413     9229139 :         sljit_u8 op_rm = (op_types >> 16) & 0xff;
    1414     9229139 :         sljit_u8 op_mr = (op_types >> 8) & 0xff;
    1415     9229139 :         sljit_u8 op_imm = op_types & 0xff;
    1416             : 
    1417     9229139 :         if (dst == SLJIT_UNUSED) {
    1418           9 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1419           9 :                 if (src2 & SLJIT_IMM) {
    1420           0 :                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
    1421             :                 }
    1422             :                 else {
    1423           9 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1424           9 :                         FAIL_IF(!inst);
    1425           9 :                         *inst = op_rm;
    1426             :                 }
    1427           9 :                 return SLJIT_SUCCESS;
    1428             :         }
    1429             : 
    1430     9229130 :         if (dst == src1 && dstw == src1w) {
    1431     9174975 :                 if (src2 & SLJIT_IMM) {
    1432             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1433     9139031 :                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
    1434             : #else
    1435             :                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
    1436             : #endif
    1437        2102 :                                 BINARY_EAX_IMM(op_eax_imm, src2w);
    1438             :                         }
    1439             :                         else {
    1440     9136929 :                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
    1441             :                         }
    1442             :                 }
    1443       35944 :                 else if (FAST_IS_REG(dst)) {
    1444       35944 :                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
    1445       35944 :                         FAIL_IF(!inst);
    1446       35944 :                         *inst = op_rm;
    1447             :                 }
    1448           0 :                 else if (FAST_IS_REG(src2)) {
    1449             :                         /* Special exception for sljit_emit_op_flags. */
    1450           0 :                         inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
    1451           0 :                         FAIL_IF(!inst);
    1452           0 :                         *inst = op_mr;
    1453             :                 }
    1454             :                 else {
    1455           0 :                         EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
    1456           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    1457           0 :                         FAIL_IF(!inst);
    1458           0 :                         *inst = op_mr;
    1459             :                 }
    1460     9174975 :                 return SLJIT_SUCCESS;
    1461             :         }
    1462             : 
    1463             :         /* Only for cumulative operations. */
    1464       54155 :         if (dst == src2 && dstw == src2w) {
    1465           0 :                 if (src1 & SLJIT_IMM) {
    1466             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1467           0 :                         if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
    1468             : #else
    1469             :                         if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
    1470             : #endif
    1471           0 :                                 BINARY_EAX_IMM(op_eax_imm, src1w);
    1472             :                         }
    1473             :                         else {
    1474           0 :                                 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
    1475             :                         }
    1476             :                 }
    1477           0 :                 else if (FAST_IS_REG(dst)) {
    1478           0 :                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
    1479           0 :                         FAIL_IF(!inst);
    1480           0 :                         *inst = op_rm;
    1481             :                 }
    1482           0 :                 else if (FAST_IS_REG(src1)) {
    1483           0 :                         inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
    1484           0 :                         FAIL_IF(!inst);
    1485           0 :                         *inst = op_mr;
    1486             :                 }
    1487             :                 else {
    1488           0 :                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1489           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    1490           0 :                         FAIL_IF(!inst);
    1491           0 :                         *inst = op_mr;
    1492             :                 }
    1493           0 :                 return SLJIT_SUCCESS;
    1494             :         }
    1495             : 
    1496             :         /* General version. */
    1497       54155 :         if (FAST_IS_REG(dst)) {
    1498       54155 :                 EMIT_MOV(compiler, dst, 0, src1, src1w);
    1499       54155 :                 if (src2 & SLJIT_IMM) {
    1500       54155 :                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
    1501             :                 }
    1502             :                 else {
    1503           0 :                         inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
    1504           0 :                         FAIL_IF(!inst);
    1505           0 :                         *inst = op_rm;
    1506             :                 }
    1507             :         }
    1508             :         else {
    1509             :                 /* This version requires less memory writing. */
    1510           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1511           0 :                 if (src2 & SLJIT_IMM) {
    1512           0 :                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
    1513             :                 }
    1514             :                 else {
    1515           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1516           0 :                         FAIL_IF(!inst);
    1517           0 :                         *inst = op_rm;
    1518             :                 }
    1519           0 :                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1520             :         }
    1521             : 
    1522       54155 :         return SLJIT_SUCCESS;
    1523             : }
    1524             : 
    1525      558392 : static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
    1526             :         sljit_u32 op_types,
    1527             :         sljit_s32 dst, sljit_sw dstw,
    1528             :         sljit_s32 src1, sljit_sw src1w,
    1529             :         sljit_s32 src2, sljit_sw src2w)
    1530             : {
    1531             :         sljit_u8* inst;
    1532      558392 :         sljit_u8 op_eax_imm = (op_types >> 24);
    1533      558392 :         sljit_u8 op_rm = (op_types >> 16) & 0xff;
    1534      558392 :         sljit_u8 op_mr = (op_types >> 8) & 0xff;
    1535      558392 :         sljit_u8 op_imm = op_types & 0xff;
    1536             : 
    1537      558392 :         if (dst == SLJIT_UNUSED) {
    1538           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1539           0 :                 if (src2 & SLJIT_IMM) {
    1540           0 :                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
    1541             :                 }
    1542             :                 else {
    1543           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1544           0 :                         FAIL_IF(!inst);
    1545           0 :                         *inst = op_rm;
    1546             :                 }
    1547           0 :                 return SLJIT_SUCCESS;
    1548             :         }
    1549             : 
    1550      558392 :         if (dst == src1 && dstw == src1w) {
    1551      506629 :                 if (src2 & SLJIT_IMM) {
    1552             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1553      454970 :                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
    1554             : #else
    1555             :                         if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
    1556             : #endif
    1557           2 :                                 BINARY_EAX_IMM(op_eax_imm, src2w);
    1558             :                         }
    1559             :                         else {
    1560      454968 :                                 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
    1561             :                         }
    1562             :                 }
    1563       51659 :                 else if (FAST_IS_REG(dst)) {
    1564       51659 :                         inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
    1565       51659 :                         FAIL_IF(!inst);
    1566       51659 :                         *inst = op_rm;
    1567             :                 }
    1568           0 :                 else if (FAST_IS_REG(src2)) {
    1569           0 :                         inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
    1570           0 :                         FAIL_IF(!inst);
    1571           0 :                         *inst = op_mr;
    1572             :                 }
    1573             :                 else {
    1574           0 :                         EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
    1575           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    1576           0 :                         FAIL_IF(!inst);
    1577           0 :                         *inst = op_mr;
    1578             :                 }
    1579      506629 :                 return SLJIT_SUCCESS;
    1580             :         }
    1581             : 
    1582             :         /* General version. */
    1583       51763 :         if (FAST_IS_REG(dst) && dst != src2) {
    1584       51763 :                 EMIT_MOV(compiler, dst, 0, src1, src1w);
    1585      103526 :                 if (src2 & SLJIT_IMM) {
    1586       51658 :                         BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
    1587             :                 }
    1588             :                 else {
    1589         105 :                         inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
    1590         105 :                         FAIL_IF(!inst);
    1591         105 :                         *inst = op_rm;
    1592             :                 }
    1593             :         }
    1594             :         else {
    1595             :                 /* This version requires less memory writing. */
    1596           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1597           0 :                 if (src2 & SLJIT_IMM) {
    1598           0 :                         BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
    1599             :                 }
    1600             :                 else {
    1601           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1602           0 :                         FAIL_IF(!inst);
    1603           0 :                         *inst = op_rm;
    1604             :                 }
    1605           0 :                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1606             :         }
    1607             : 
    1608       51763 :         return SLJIT_SUCCESS;
    1609             : }
    1610             : 
    1611           0 : static sljit_s32 emit_mul(struct sljit_compiler *compiler,
    1612             :         sljit_s32 dst, sljit_sw dstw,
    1613             :         sljit_s32 src1, sljit_sw src1w,
    1614             :         sljit_s32 src2, sljit_sw src2w)
    1615             : {
    1616             :         sljit_u8* inst;
    1617             :         sljit_s32 dst_r;
    1618             : 
    1619           0 :         dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
    1620             : 
    1621             :         /* Register destination. */
    1622           0 :         if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
    1623           0 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
    1624           0 :                 FAIL_IF(!inst);
    1625           0 :                 *inst++ = GROUP_0F;
    1626           0 :                 *inst = IMUL_r_rm;
    1627             :         }
    1628           0 :         else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
    1629           0 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
    1630           0 :                 FAIL_IF(!inst);
    1631           0 :                 *inst++ = GROUP_0F;
    1632           0 :                 *inst = IMUL_r_rm;
    1633             :         }
    1634           0 :         else if (src1 & SLJIT_IMM) {
    1635           0 :                 if (src2 & SLJIT_IMM) {
    1636           0 :                         EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
    1637           0 :                         src2 = dst_r;
    1638           0 :                         src2w = 0;
    1639             :                 }
    1640             : 
    1641           0 :                 if (src1w <= 127 && src1w >= -128) {
    1642           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
    1643           0 :                         FAIL_IF(!inst);
    1644           0 :                         *inst = IMUL_r_rm_i8;
    1645           0 :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    1646           0 :                         FAIL_IF(!inst);
    1647           0 :                         INC_SIZE(1);
    1648           0 :                         *inst = (sljit_s8)src1w;
    1649             :                 }
    1650             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1651             :                 else {
    1652             :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
    1653             :                         FAIL_IF(!inst);
    1654             :                         *inst = IMUL_r_rm_i32;
    1655             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    1656             :                         FAIL_IF(!inst);
    1657             :                         INC_SIZE(4);
    1658             :                         sljit_unaligned_store_sw(inst, src1w);
    1659             :                 }
    1660             : #else
    1661           0 :                 else if (IS_HALFWORD(src1w)) {
    1662           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
    1663           0 :                         FAIL_IF(!inst);
    1664           0 :                         *inst = IMUL_r_rm_i32;
    1665           0 :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    1666           0 :                         FAIL_IF(!inst);
    1667           0 :                         INC_SIZE(4);
    1668           0 :                         sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
    1669             :                 }
    1670             :                 else {
    1671           0 :                         if (dst_r != src2)
    1672           0 :                                 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
    1673           0 :                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
    1674           0 :                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
    1675           0 :                         FAIL_IF(!inst);
    1676           0 :                         *inst++ = GROUP_0F;
    1677           0 :                         *inst = IMUL_r_rm;
    1678             :                 }
    1679             : #endif
    1680             :         }
    1681           0 :         else if (src2 & SLJIT_IMM) {
    1682             :                 /* Note: src1 is NOT immediate. */
    1683             : 
    1684           0 :                 if (src2w <= 127 && src2w >= -128) {
    1685           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
    1686           0 :                         FAIL_IF(!inst);
    1687           0 :                         *inst = IMUL_r_rm_i8;
    1688           0 :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    1689           0 :                         FAIL_IF(!inst);
    1690           0 :                         INC_SIZE(1);
    1691           0 :                         *inst = (sljit_s8)src2w;
    1692             :                 }
    1693             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    1694             :                 else {
    1695             :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
    1696             :                         FAIL_IF(!inst);
    1697             :                         *inst = IMUL_r_rm_i32;
    1698             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    1699             :                         FAIL_IF(!inst);
    1700             :                         INC_SIZE(4);
    1701             :                         sljit_unaligned_store_sw(inst, src2w);
    1702             :                 }
    1703             : #else
    1704           0 :                 else if (IS_HALFWORD(src2w)) {
    1705           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
    1706           0 :                         FAIL_IF(!inst);
    1707           0 :                         *inst = IMUL_r_rm_i32;
    1708           0 :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    1709           0 :                         FAIL_IF(!inst);
    1710           0 :                         INC_SIZE(4);
    1711           0 :                         sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
    1712             :                 }
    1713             :                 else {
    1714           0 :                         if (dst_r != src1)
    1715           0 :                                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
    1716           0 :                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
    1717           0 :                         inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
    1718           0 :                         FAIL_IF(!inst);
    1719           0 :                         *inst++ = GROUP_0F;
    1720           0 :                         *inst = IMUL_r_rm;
    1721             :                 }
    1722             : #endif
    1723             :         }
    1724             :         else {
    1725             :                 /* Neither argument is immediate. */
    1726           0 :                 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
    1727           0 :                         dst_r = TMP_REG1;
    1728           0 :                 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
    1729           0 :                 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
    1730           0 :                 FAIL_IF(!inst);
    1731           0 :                 *inst++ = GROUP_0F;
    1732           0 :                 *inst = IMUL_r_rm;
    1733             :         }
    1734             : 
    1735           0 :         if (dst & SLJIT_MEM)
    1736           0 :                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    1737             : 
    1738           0 :         return SLJIT_SUCCESS;
    1739             : }
    1740             : 
    1741     9789719 : static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
    1742             :         sljit_s32 dst, sljit_sw dstw,
    1743             :         sljit_s32 src1, sljit_sw src1w,
    1744             :         sljit_s32 src2, sljit_sw src2w)
    1745             : {
    1746             :         sljit_u8* inst;
    1747     9789719 :         sljit_s32 dst_r, done = 0;
    1748             : 
    1749             :         /* These cases better be left to handled by normal way. */
    1750     9789719 :         if (dst == src1 && dstw == src1w)
    1751     9429216 :                 return SLJIT_ERR_UNSUPPORTED;
    1752      360503 :         if (dst == src2 && dstw == src2w)
    1753           0 :                 return SLJIT_ERR_UNSUPPORTED;
    1754             : 
    1755      360503 :         dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    1756             : 
    1757      360503 :         if (FAST_IS_REG(src1)) {
    1758      257000 :                 if (FAST_IS_REG(src2)) {
    1759           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
    1760           0 :                         FAIL_IF(!inst);
    1761           0 :                         *inst = LEA_r_m;
    1762           0 :                         done = 1;
    1763             :                 }
    1764             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1765      257000 :                 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
    1766      257000 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
    1767             : #else
    1768             :                 if (src2 & SLJIT_IMM) {
    1769             :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
    1770             : #endif
    1771      257000 :                         FAIL_IF(!inst);
    1772      257000 :                         *inst = LEA_r_m;
    1773      257000 :                         done = 1;
    1774             :                 }
    1775             :         }
    1776      103503 :         else if (FAST_IS_REG(src2)) {
    1777             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1778           0 :                 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
    1779           0 :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
    1780             : #else
    1781             :                 if (src1 & SLJIT_IMM) {
    1782             :                         inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
    1783             : #endif
    1784           0 :                         FAIL_IF(!inst);
    1785           0 :                         *inst = LEA_r_m;
    1786           0 :                         done = 1;
    1787             :                 }
    1788             :         }
    1789             : 
    1790      360503 :         if (done) {
    1791      257000 :                 if (dst_r == TMP_REG1)
    1792          55 :                         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
    1793      256945 :                 return SLJIT_SUCCESS;
    1794             :         }
    1795      103503 :         return SLJIT_ERR_UNSUPPORTED;
    1796             : }
    1797             : 
    1798    19643623 : static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
    1799             :         sljit_s32 src1, sljit_sw src1w,
    1800             :         sljit_s32 src2, sljit_sw src2w)
    1801             : {
    1802             :         sljit_u8* inst;
    1803             : 
    1804             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1805    19643623 :         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
    1806             : #else
    1807             :         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
    1808             : #endif
    1809      792168 :                 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
    1810      792168 :                 return SLJIT_SUCCESS;
    1811             :         }
    1812             : 
    1813    18851455 :         if (FAST_IS_REG(src1)) {
    1814    18851393 :                 if (src2 & SLJIT_IMM) {
    1815     9693694 :                         BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
    1816             :                 }
    1817             :                 else {
    1818     9157699 :                         inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
    1819     9157699 :                         FAIL_IF(!inst);
    1820     9157699 :                         *inst = CMP_r_rm;
    1821             :                 }
    1822    18851393 :                 return SLJIT_SUCCESS;
    1823             :         }
    1824             : 
    1825          62 :         if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
    1826          60 :                 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
    1827          60 :                 FAIL_IF(!inst);
    1828          60 :                 *inst = CMP_rm_r;
    1829          60 :                 return SLJIT_SUCCESS;
    1830             :         }
    1831             : 
    1832           2 :         if (src2 & SLJIT_IMM) {
    1833           0 :                 if (src1 & SLJIT_IMM) {
    1834           0 :                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1835           0 :                         src1 = TMP_REG1;
    1836           0 :                         src1w = 0;
    1837             :                 }
    1838           0 :                 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
    1839             :         }
    1840             :         else {
    1841           2 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1842           2 :                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1843           2 :                 FAIL_IF(!inst);
    1844           2 :                 *inst = CMP_r_rm;
    1845             :         }
    1846           2 :         return SLJIT_SUCCESS;
    1847             : }
    1848             : 
    1849      105954 : static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
    1850             :         sljit_s32 src1, sljit_sw src1w,
    1851             :         sljit_s32 src2, sljit_sw src2w)
    1852             : {
    1853             :         sljit_u8* inst;
    1854             : 
    1855             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1856      105954 :         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
    1857             : #else
    1858             :         if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
    1859             : #endif
    1860          32 :                 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
    1861          32 :                 return SLJIT_SUCCESS;
    1862             :         }
    1863             : 
    1864             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1865      105922 :         if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
    1866             : #else
    1867             :         if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
    1868             : #endif
    1869           0 :                 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
    1870           0 :                 return SLJIT_SUCCESS;
    1871             :         }
    1872             : 
    1873      105922 :         if (!(src1 & SLJIT_IMM)) {
    1874      105922 :                 if (src2 & SLJIT_IMM) {
    1875             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1876      103616 :                         if (IS_HALFWORD(src2w) || compiler->mode32) {
    1877      103616 :                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
    1878      103616 :                                 FAIL_IF(!inst);
    1879      103616 :                                 *inst = GROUP_F7;
    1880             :                         }
    1881             :                         else {
    1882           0 :                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
    1883           0 :                                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
    1884           0 :                                 FAIL_IF(!inst);
    1885           0 :                                 *inst = TEST_rm_r;
    1886             :                         }
    1887             : #else
    1888             :                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
    1889             :                         FAIL_IF(!inst);
    1890             :                         *inst = GROUP_F7;
    1891             : #endif
    1892      103616 :                         return SLJIT_SUCCESS;
    1893             :                 }
    1894        2306 :                 else if (FAST_IS_REG(src1)) {
    1895        2306 :                         inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
    1896        2306 :                         FAIL_IF(!inst);
    1897        2306 :                         *inst = TEST_rm_r;
    1898        2306 :                         return SLJIT_SUCCESS;
    1899             :                 }
    1900             :         }
    1901             : 
    1902           0 :         if (!(src2 & SLJIT_IMM)) {
    1903           0 :                 if (src1 & SLJIT_IMM) {
    1904             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1905           0 :                         if (IS_HALFWORD(src1w) || compiler->mode32) {
    1906           0 :                                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
    1907           0 :                                 FAIL_IF(!inst);
    1908           0 :                                 *inst = GROUP_F7;
    1909             :                         }
    1910             :                         else {
    1911           0 :                                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
    1912           0 :                                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1913           0 :                                 FAIL_IF(!inst);
    1914           0 :                                 *inst = TEST_rm_r;
    1915             :                         }
    1916             : #else
    1917             :                         inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
    1918             :                         FAIL_IF(!inst);
    1919             :                         *inst = GROUP_F7;
    1920             : #endif
    1921           0 :                         return SLJIT_SUCCESS;
    1922             :                 }
    1923           0 :                 else if (FAST_IS_REG(src2)) {
    1924           0 :                         inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
    1925           0 :                         FAIL_IF(!inst);
    1926           0 :                         *inst = TEST_rm_r;
    1927           0 :                         return SLJIT_SUCCESS;
    1928             :                 }
    1929             :         }
    1930             : 
    1931           0 :         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1932           0 :         if (src2 & SLJIT_IMM) {
    1933             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    1934           0 :                 if (IS_HALFWORD(src2w) || compiler->mode32) {
    1935           0 :                         inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
    1936           0 :                         FAIL_IF(!inst);
    1937           0 :                         *inst = GROUP_F7;
    1938             :                 }
    1939             :                 else {
    1940           0 :                         FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
    1941           0 :                         inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
    1942           0 :                         FAIL_IF(!inst);
    1943           0 :                         *inst = TEST_rm_r;
    1944             :                 }
    1945             : #else
    1946             :                 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
    1947             :                 FAIL_IF(!inst);
    1948             :                 *inst = GROUP_F7;
    1949             : #endif
    1950             :         }
    1951             :         else {
    1952           0 :                 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
    1953           0 :                 FAIL_IF(!inst);
    1954           0 :                 *inst = TEST_rm_r;
    1955             :         }
    1956           0 :         return SLJIT_SUCCESS;
    1957             : }
    1958             : 
    1959        4815 : static sljit_s32 emit_shift(struct sljit_compiler *compiler,
    1960             :         sljit_u8 mode,
    1961             :         sljit_s32 dst, sljit_sw dstw,
    1962             :         sljit_s32 src1, sljit_sw src1w,
    1963             :         sljit_s32 src2, sljit_sw src2w)
    1964             : {
    1965             :         sljit_u8* inst;
    1966             : 
    1967        4815 :         if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
    1968        4815 :                 if (dst == src1 && dstw == src1w) {
    1969        2471 :                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
    1970        2471 :                         FAIL_IF(!inst);
    1971        2471 :                         *inst |= mode;
    1972        2471 :                         return SLJIT_SUCCESS;
    1973             :                 }
    1974        2344 :                 if (dst == SLJIT_UNUSED) {
    1975           0 :                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1976           0 :                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
    1977           0 :                         FAIL_IF(!inst);
    1978           0 :                         *inst |= mode;
    1979           0 :                         return SLJIT_SUCCESS;
    1980             :                 }
    1981        2344 :                 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
    1982        2179 :                         EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1983        2179 :                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    1984        2179 :                         FAIL_IF(!inst);
    1985        2179 :                         *inst |= mode;
    1986        2179 :                         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    1987        2179 :                         return SLJIT_SUCCESS;
    1988             :                 }
    1989         165 :                 if (FAST_IS_REG(dst)) {
    1990         165 :                         EMIT_MOV(compiler, dst, 0, src1, src1w);
    1991         165 :                         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
    1992         165 :                         FAIL_IF(!inst);
    1993         165 :                         *inst |= mode;
    1994         165 :                         return SLJIT_SUCCESS;
    1995             :                 }
    1996             : 
    1997           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    1998           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
    1999           0 :                 FAIL_IF(!inst);
    2000           0 :                 *inst |= mode;
    2001           0 :                 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
    2002           0 :                 return SLJIT_SUCCESS;
    2003             :         }
    2004             : 
    2005           0 :         if (dst == SLJIT_PREF_SHIFT_REG) {
    2006           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    2007           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
    2008           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    2009           0 :                 FAIL_IF(!inst);
    2010           0 :                 *inst |= mode;
    2011           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    2012             :         }
    2013           0 :         else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
    2014           0 :                 if (src1 != dst)
    2015           0 :                         EMIT_MOV(compiler, dst, 0, src1, src1w);
    2016           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
    2017           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
    2018           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
    2019           0 :                 FAIL_IF(!inst);
    2020           0 :                 *inst |= mode;
    2021           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    2022             :         }
    2023             :         else {
    2024             :                 /* This case is complex since ecx itself may be used for
    2025             :                    addressing, and this case must be supported as well. */
    2026           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
    2027             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2028             :                 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
    2029             :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
    2030             :                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    2031             :                 FAIL_IF(!inst);
    2032             :                 *inst |= mode;
    2033             :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
    2034             : #else
    2035           0 :                 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
    2036           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
    2037           0 :                 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
    2038           0 :                 FAIL_IF(!inst);
    2039           0 :                 *inst |= mode;
    2040           0 :                 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
    2041             : #endif
    2042           0 :                 if (dst != SLJIT_UNUSED)
    2043           0 :                         return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
    2044             :         }
    2045             : 
    2046           0 :         return SLJIT_SUCCESS;
    2047             : }
    2048             : 
    2049        4815 : static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
    2050             :         sljit_u8 mode, sljit_s32 set_flags,
    2051             :         sljit_s32 dst, sljit_sw dstw,
    2052             :         sljit_s32 src1, sljit_sw src1w,
    2053             :         sljit_s32 src2, sljit_sw src2w)
    2054             : {
    2055             :         /* The CPU does not set flags if the shift count is 0. */
    2056        4815 :         if (src2 & SLJIT_IMM) {
    2057             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2058        2509 :                 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
    2059        2509 :                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
    2060             : #else
    2061             :                 if ((src2w & 0x1f) != 0)
    2062             :                         return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
    2063             : #endif
    2064           0 :                 if (!set_flags)
    2065           0 :                         return emit_mov(compiler, dst, dstw, src1, src1w);
    2066             :                 /* OR dst, src, 0 */
    2067           0 :                 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
    2068             :                         dst, dstw, src1, src1w, SLJIT_IMM, 0);
    2069             :         }
    2070             : 
    2071        2306 :         if (!set_flags)
    2072        2306 :                 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
    2073             : 
    2074           0 :         if (!FAST_IS_REG(dst))
    2075           0 :                 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
    2076             : 
    2077           0 :         FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
    2078             : 
    2079           0 :         if (FAST_IS_REG(dst))
    2080           0 :                 return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
    2081           0 :         return SLJIT_SUCCESS;
    2082             : }
    2083             : 
    2084    29690583 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
    2085             :         sljit_s32 dst, sljit_sw dstw,
    2086             :         sljit_s32 src1, sljit_sw src1w,
    2087             :         sljit_s32 src2, sljit_sw src2w)
    2088             : {
    2089    29690583 :         CHECK_ERROR();
    2090             :         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
    2091    29690583 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    2092    29690583 :         ADJUST_LOCAL_OFFSET(src1, src1w);
    2093    29690583 :         ADJUST_LOCAL_OFFSET(src2, src2w);
    2094             : 
    2095             :         CHECK_EXTRA_REGS(dst, dstw, (void)0);
    2096             :         CHECK_EXTRA_REGS(src1, src1w, (void)0);
    2097             :         CHECK_EXTRA_REGS(src2, src2w, (void)0);
    2098             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2099    29690583 :         compiler->mode32 = op & SLJIT_I32_OP;
    2100             : #endif
    2101             : 
    2102    29690583 :         if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
    2103           0 :                 return SLJIT_SUCCESS;
    2104             : 
    2105    29690583 :         switch (GET_OPCODE(op)) {
    2106     9368613 :         case SLJIT_ADD:
    2107     9368613 :                 if (!HAS_FLAGS(op)) {
    2108     9368613 :                         if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
    2109      148580 :                                 return compiler->error;
    2110             :                 }
    2111     9220033 :                 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
    2112             :                         dst, dstw, src1, src1w, src2, src2w);
    2113           0 :         case SLJIT_ADDC:
    2114           0 :                 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
    2115             :                         dst, dstw, src1, src1w, src2, src2w);
    2116    20202095 :         case SLJIT_SUB:
    2117    20202095 :                 if (!HAS_FLAGS(op)) {
    2118      416079 :                         if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
    2119       51734 :                                 return compiler->error;
    2120             :                 }
    2121             : 
    2122    20150361 :                 if (dst == SLJIT_UNUSED)
    2123    19643623 :                         return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
    2124      506738 :                 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
    2125             :                         dst, dstw, src1, src1w, src2, src2w);
    2126           0 :         case SLJIT_SUBC:
    2127           0 :                 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
    2128             :                         dst, dstw, src1, src1w, src2, src2w);
    2129           0 :         case SLJIT_MUL:
    2130           0 :                 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
    2131      108423 :         case SLJIT_AND:
    2132      108423 :                 if (dst == SLJIT_UNUSED)
    2133      105954 :                         return emit_test_binary(compiler, src1, src1w, src2, src2w);
    2134        2469 :                 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
    2135             :                         dst, dstw, src1, src1w, src2, src2w);
    2136        6597 :         case SLJIT_OR:
    2137        6597 :                 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
    2138             :                         dst, dstw, src1, src1w, src2, src2w);
    2139          40 :         case SLJIT_XOR:
    2140          40 :                 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
    2141             :                         dst, dstw, src1, src1w, src2, src2w);
    2142        2455 :         case SLJIT_SHL:
    2143        2455 :                 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
    2144             :                         dst, dstw, src1, src1w, src2, src2w);
    2145        2360 :         case SLJIT_LSHR:
    2146        2360 :                 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
    2147             :                         dst, dstw, src1, src1w, src2, src2w);
    2148           0 :         case SLJIT_ASHR:
    2149           0 :                 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
    2150             :                         dst, dstw, src1, src1w, src2, src2w);
    2151             :         }
    2152             : 
    2153           0 :         return SLJIT_SUCCESS;
    2154             : }
    2155             : 
    2156         369 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
    2157             : {
    2158             :         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
    2159             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2160             :         if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
    2161             :                 return -1;
    2162             : #endif
    2163         369 :         return reg_map[reg];
    2164             : }
    2165             : 
    2166           0 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
    2167             : {
    2168             :         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
    2169             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2170             :         return reg;
    2171             : #else
    2172           0 :         return freg_map[reg];
    2173             : #endif
    2174             : }
    2175             : 
    2176           0 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
    2177             :         void *instruction, sljit_s32 size)
    2178             : {
    2179             :         sljit_u8 *inst;
    2180             : 
    2181           0 :         CHECK_ERROR();
    2182             :         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
    2183             : 
    2184           0 :         inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    2185           0 :         FAIL_IF(!inst);
    2186           0 :         INC_SIZE(size);
    2187           0 :         SLJIT_MEMCPY(inst, instruction, size);
    2188           0 :         return SLJIT_SUCCESS;
    2189             : }
    2190             : 
    2191             : /* --------------------------------------------------------------------- */
    2192             : /*  Floating point operators                                             */
    2193             : /* --------------------------------------------------------------------- */
    2194             : 
    2195             : /* Alignment(3) + 4 * 16 bytes. */
    2196             : static sljit_s32 sse2_data[3 + (4 * 4)];
    2197             : static sljit_s32 *sse2_buffer;
    2198             : 
    2199        1531 : static void init_compiler(void)
    2200             : {
    2201             :         /* Align to 16 bytes. */
    2202        1531 :         sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
    2203             : 
    2204             :         /* Single precision constants (each constant is 16 byte long). */
    2205        1531 :         sse2_buffer[0] = 0x80000000;
    2206        1531 :         sse2_buffer[4] = 0x7fffffff;
    2207             :         /* Double precision constants (each constant is 16 byte long). */
    2208        1531 :         sse2_buffer[8] = 0;
    2209        1531 :         sse2_buffer[9] = 0x80000000;
    2210        1531 :         sse2_buffer[12] = 0xffffffff;
    2211        1531 :         sse2_buffer[13] = 0x7fffffff;
    2212        1531 : }
    2213             : 
    2214           0 : static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
    2215             :         sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
    2216             : {
    2217             :         sljit_u8 *inst;
    2218             : 
    2219           0 :         inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
    2220           0 :         FAIL_IF(!inst);
    2221           0 :         *inst++ = GROUP_0F;
    2222           0 :         *inst = opcode;
    2223           0 :         return SLJIT_SUCCESS;
    2224             : }
    2225             : 
    2226           0 : static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
    2227             :         sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
    2228             : {
    2229             :         sljit_u8 *inst;
    2230             : 
    2231           0 :         inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
    2232           0 :         FAIL_IF(!inst);
    2233           0 :         *inst++ = GROUP_0F;
    2234           0 :         *inst = opcode;
    2235           0 :         return SLJIT_SUCCESS;
    2236             : }
    2237             : 
    2238           0 : static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
    2239             :         sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
    2240             : {
    2241           0 :         return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
    2242             : }
    2243             : 
    2244           0 : static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
    2245             :         sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
    2246             : {
    2247           0 :         return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
    2248             : }
    2249             : 
    2250           0 : static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
    2251             :         sljit_s32 dst, sljit_sw dstw,
    2252             :         sljit_s32 src, sljit_sw srcw)
    2253             : {
    2254           0 :         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    2255             :         sljit_u8 *inst;
    2256             : 
    2257             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2258           0 :         if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
    2259           0 :                 compiler->mode32 = 0;
    2260             : #endif
    2261             : 
    2262           0 :         inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
    2263           0 :         FAIL_IF(!inst);
    2264           0 :         *inst++ = GROUP_0F;
    2265           0 :         *inst = CVTTSD2SI_r_xm;
    2266             : 
    2267           0 :         if (dst & SLJIT_MEM)
    2268           0 :                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
    2269           0 :         return SLJIT_SUCCESS;
    2270             : }
    2271             : 
    2272           0 : static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
    2273             :         sljit_s32 dst, sljit_sw dstw,
    2274             :         sljit_s32 src, sljit_sw srcw)
    2275             : {
    2276           0 :         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
    2277             :         sljit_u8 *inst;
    2278             : 
    2279             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2280           0 :         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
    2281           0 :                 compiler->mode32 = 0;
    2282             : #endif
    2283             : 
    2284           0 :         if (src & SLJIT_IMM) {
    2285             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2286           0 :                 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
    2287           0 :                         srcw = (sljit_s32)srcw;
    2288             : #endif
    2289           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
    2290           0 :                 src = TMP_REG1;
    2291           0 :                 srcw = 0;
    2292             :         }
    2293             : 
    2294           0 :         inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
    2295           0 :         FAIL_IF(!inst);
    2296           0 :         *inst++ = GROUP_0F;
    2297           0 :         *inst = CVTSI2SD_x_rm;
    2298             : 
    2299             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2300           0 :         compiler->mode32 = 1;
    2301             : #endif
    2302           0 :         if (dst_r == TMP_FREG)
    2303           0 :                 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
    2304           0 :         return SLJIT_SUCCESS;
    2305             : }
    2306             : 
    2307           0 : static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
    2308             :         sljit_s32 src1, sljit_sw src1w,
    2309             :         sljit_s32 src2, sljit_sw src2w)
    2310             : {
    2311           0 :         if (!FAST_IS_REG(src1)) {
    2312           0 :                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
    2313           0 :                 src1 = TMP_FREG;
    2314             :         }
    2315             : 
    2316           0 :         return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
    2317             : }
    2318             : 
    2319           0 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
    2320             :         sljit_s32 dst, sljit_sw dstw,
    2321             :         sljit_s32 src, sljit_sw srcw)
    2322             : {
    2323             :         sljit_s32 dst_r;
    2324             : 
    2325             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2326           0 :         compiler->mode32 = 1;
    2327             : #endif
    2328             : 
    2329           0 :         CHECK_ERROR();
    2330           0 :         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
    2331             : 
    2332           0 :         if (GET_OPCODE(op) == SLJIT_MOV_F64) {
    2333           0 :                 if (FAST_IS_REG(dst))
    2334           0 :                         return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
    2335           0 :                 if (FAST_IS_REG(src))
    2336           0 :                         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
    2337           0 :                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
    2338           0 :                 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
    2339             :         }
    2340             : 
    2341           0 :         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
    2342           0 :                 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
    2343           0 :                 if (FAST_IS_REG(src)) {
    2344             :                         /* We overwrite the high bits of source. From SLJIT point of view,
    2345             :                            this is not an issue.
    2346             :                            Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
    2347           0 :                         FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
    2348             :                 }
    2349             :                 else {
    2350           0 :                         FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
    2351           0 :                         src = TMP_FREG;
    2352             :                 }
    2353             : 
    2354           0 :                 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
    2355           0 :                 if (dst_r == TMP_FREG)
    2356           0 :                         return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
    2357           0 :                 return SLJIT_SUCCESS;
    2358             :         }
    2359             : 
    2360           0 :         if (FAST_IS_REG(dst)) {
    2361           0 :                 dst_r = dst;
    2362           0 :                 if (dst != src)
    2363           0 :                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
    2364             :         }
    2365             :         else {
    2366           0 :                 dst_r = TMP_FREG;
    2367           0 :                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
    2368             :         }
    2369             : 
    2370           0 :         switch (GET_OPCODE(op)) {
    2371           0 :         case SLJIT_NEG_F64:
    2372           0 :                 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
    2373           0 :                 break;
    2374             : 
    2375           0 :         case SLJIT_ABS_F64:
    2376           0 :                 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
    2377           0 :                 break;
    2378             :         }
    2379             : 
    2380           0 :         if (dst_r == TMP_FREG)
    2381           0 :                 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
    2382           0 :         return SLJIT_SUCCESS;
    2383             : }
    2384             : 
    2385           0 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
    2386             :         sljit_s32 dst, sljit_sw dstw,
    2387             :         sljit_s32 src1, sljit_sw src1w,
    2388             :         sljit_s32 src2, sljit_sw src2w)
    2389             : {
    2390             :         sljit_s32 dst_r;
    2391             : 
    2392           0 :         CHECK_ERROR();
    2393             :         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
    2394           0 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    2395           0 :         ADJUST_LOCAL_OFFSET(src1, src1w);
    2396           0 :         ADJUST_LOCAL_OFFSET(src2, src2w);
    2397             : 
    2398             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2399           0 :         compiler->mode32 = 1;
    2400             : #endif
    2401             : 
    2402           0 :         if (FAST_IS_REG(dst)) {
    2403           0 :                 dst_r = dst;
    2404           0 :                 if (dst == src1)
    2405             :                         ; /* Do nothing here. */
    2406           0 :                 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
    2407             :                         /* Swap arguments. */
    2408           0 :                         src2 = src1;
    2409           0 :                         src2w = src1w;
    2410             :                 }
    2411           0 :                 else if (dst != src2)
    2412           0 :                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
    2413             :                 else {
    2414           0 :                         dst_r = TMP_FREG;
    2415           0 :                         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
    2416             :                 }
    2417             :         }
    2418             :         else {
    2419           0 :                 dst_r = TMP_FREG;
    2420           0 :                 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
    2421             :         }
    2422             : 
    2423           0 :         switch (GET_OPCODE(op)) {
    2424           0 :         case SLJIT_ADD_F64:
    2425           0 :                 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
    2426           0 :                 break;
    2427             : 
    2428           0 :         case SLJIT_SUB_F64:
    2429           0 :                 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
    2430           0 :                 break;
    2431             : 
    2432           0 :         case SLJIT_MUL_F64:
    2433           0 :                 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
    2434           0 :                 break;
    2435             : 
    2436           0 :         case SLJIT_DIV_F64:
    2437           0 :                 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
    2438           0 :                 break;
    2439             :         }
    2440             : 
    2441           0 :         if (dst_r == TMP_FREG)
    2442           0 :                 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
    2443           0 :         return SLJIT_SUCCESS;
    2444             : }
    2445             : 
    2446             : /* --------------------------------------------------------------------- */
    2447             : /*  Conditional instructions                                             */
    2448             : /* --------------------------------------------------------------------- */
    2449             : 
    2450     1533111 : SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
    2451             : {
    2452             :         sljit_u8 *inst;
    2453             :         struct sljit_label *label;
    2454             : 
    2455     1533111 :         CHECK_ERROR_PTR();
    2456             :         CHECK_PTR(check_sljit_emit_label(compiler));
    2457             : 
    2458     1533111 :         if (compiler->last_label && compiler->last_label->size == compiler->size)
    2459      276552 :                 return compiler->last_label;
    2460             : 
    2461     1256559 :         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
    2462     1256559 :         PTR_FAIL_IF(!label);
    2463     1256559 :         set_label(label, compiler);
    2464             : 
    2465     1256559 :         inst = (sljit_u8*)ensure_buf(compiler, 2);
    2466     1256559 :         PTR_FAIL_IF(!inst);
    2467             : 
    2468     1256559 :         *inst++ = 0;
    2469     1256559 :         *inst++ = 0;
    2470             : 
    2471     1256559 :         return label;
    2472             : }
    2473             : 
    2474    20388200 : SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
    2475             : {
    2476             :         sljit_u8 *inst;
    2477             :         struct sljit_jump *jump;
    2478             : 
    2479    20388200 :         CHECK_ERROR_PTR();
    2480             :         CHECK_PTR(check_sljit_emit_jump(compiler, type));
    2481             : 
    2482    20388200 :         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
    2483    20388200 :         PTR_FAIL_IF_NULL(jump);
    2484    20388200 :         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
    2485    20388200 :         type &= 0xff;
    2486             : 
    2487             :         /* Worst case size. */
    2488             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2489             :         compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
    2490             : #else
    2491    20388200 :         compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
    2492             : #endif
    2493             : 
    2494    20388200 :         inst = (sljit_u8*)ensure_buf(compiler, 2);
    2495    20388200 :         PTR_FAIL_IF_NULL(inst);
    2496             : 
    2497    20388200 :         *inst++ = 0;
    2498    20388200 :         *inst++ = type + 2;
    2499    20388200 :         return jump;
    2500             : }
    2501             : 
    2502       51696 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
    2503             : {
    2504             :         sljit_u8 *inst;
    2505             :         struct sljit_jump *jump;
    2506             : 
    2507       51696 :         CHECK_ERROR();
    2508             :         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
    2509       51696 :         ADJUST_LOCAL_OFFSET(src, srcw);
    2510             : 
    2511             :         CHECK_EXTRA_REGS(src, srcw, (void)0);
    2512             : 
    2513       51696 :         if (src == SLJIT_IMM) {
    2514       51655 :                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
    2515       51655 :                 FAIL_IF_NULL(jump);
    2516       51655 :                 set_jump(jump, compiler, JUMP_ADDR);
    2517       51655 :                 jump->u.target = srcw;
    2518             : 
    2519             :                 /* Worst case size. */
    2520             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2521             :                 compiler->size += 5;
    2522             : #else
    2523       51655 :                 compiler->size += 10 + 3;
    2524             : #endif
    2525             : 
    2526       51655 :                 inst = (sljit_u8*)ensure_buf(compiler, 2);
    2527       51655 :                 FAIL_IF_NULL(inst);
    2528             : 
    2529       51655 :                 *inst++ = 0;
    2530       51655 :                 *inst++ = type + 2;
    2531             :         }
    2532             :         else {
    2533             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2534             :                 /* REX_W is not necessary (src is not immediate). */
    2535          41 :                 compiler->mode32 = 1;
    2536             : #endif
    2537          41 :                 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    2538          41 :                 FAIL_IF(!inst);
    2539          41 :                 *inst++ = GROUP_FF;
    2540          41 :                 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
    2541             :         }
    2542       51696 :         return SLJIT_SUCCESS;
    2543             : }
    2544             : 
    2545         146 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
    2546             :         sljit_s32 dst, sljit_sw dstw,
    2547             :         sljit_s32 type)
    2548             : {
    2549             :         sljit_u8 *inst;
    2550         146 :         sljit_u8 cond_set = 0;
    2551             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2552             :         sljit_s32 reg;
    2553             : #endif
    2554             :         /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
    2555         146 :         sljit_s32 dst_save = dst;
    2556         146 :         sljit_sw dstw_save = dstw;
    2557             : 
    2558         146 :         CHECK_ERROR();
    2559             :         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
    2560             : 
    2561         146 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    2562             :         CHECK_EXTRA_REGS(dst, dstw, (void)0);
    2563             : 
    2564         146 :         type &= 0xff;
    2565             :         /* setcc = jcc + 0x10. */
    2566         146 :         cond_set = get_jump_code(type) + 0x10;
    2567             : 
    2568             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2569         146 :         if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
    2570          47 :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
    2571          47 :                 FAIL_IF(!inst);
    2572          47 :                 INC_SIZE(4 + 3);
    2573             :                 /* Set low register to conditional flag. */
    2574          47 :                 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
    2575          47 :                 *inst++ = GROUP_0F;
    2576          47 :                 *inst++ = cond_set;
    2577          47 :                 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
    2578          47 :                 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
    2579          47 :                 *inst++ = OR_rm8_r8;
    2580          47 :                 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
    2581          47 :                 return SLJIT_SUCCESS;
    2582             :         }
    2583             : 
    2584          99 :         reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
    2585             : 
    2586          99 :         inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
    2587          99 :         FAIL_IF(!inst);
    2588          99 :         INC_SIZE(4 + 4);
    2589             :         /* Set low register to conditional flag. */
    2590          99 :         *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
    2591          99 :         *inst++ = GROUP_0F;
    2592          99 :         *inst++ = cond_set;
    2593          99 :         *inst++ = MOD_REG | reg_lmap[reg];
    2594          99 :         *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
    2595             :         /* The movzx instruction does not affect flags. */
    2596          99 :         *inst++ = GROUP_0F;
    2597          99 :         *inst++ = MOVZX_r_rm8;
    2598          99 :         *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
    2599             : 
    2600          99 :         if (reg != TMP_REG1)
    2601          51 :                 return SLJIT_SUCCESS;
    2602             : 
    2603          48 :         if (GET_OPCODE(op) < SLJIT_ADD) {
    2604           0 :                 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
    2605           0 :                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
    2606             :         }
    2607             : 
    2608             : #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
    2609             :                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
    2610             :         compiler->skip_checks = 1;
    2611             : #endif
    2612          48 :         return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
    2613             : 
    2614             : #else
    2615             :         /* The SLJIT_CONFIG_X86_32 code path starts here. */
    2616             :         if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
    2617             :                 if (reg_map[dst] <= 4) {
    2618             :                         /* Low byte is accessible. */
    2619             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
    2620             :                         FAIL_IF(!inst);
    2621             :                         INC_SIZE(3 + 3);
    2622             :                         /* Set low byte to conditional flag. */
    2623             :                         *inst++ = GROUP_0F;
    2624             :                         *inst++ = cond_set;
    2625             :                         *inst++ = MOD_REG | reg_map[dst];
    2626             : 
    2627             :                         *inst++ = GROUP_0F;
    2628             :                         *inst++ = MOVZX_r_rm8;
    2629             :                         *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
    2630             :                         return SLJIT_SUCCESS;
    2631             :                 }
    2632             : 
    2633             :                 /* Low byte is not accessible. */
    2634             :                 if (cpu_has_cmov == -1)
    2635             :                         get_cpu_features();
    2636             : 
    2637             :                 if (cpu_has_cmov) {
    2638             :                         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
    2639             :                         /* a xor reg, reg operation would overwrite the flags. */
    2640             :                         EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
    2641             : 
    2642             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
    2643             :                         FAIL_IF(!inst);
    2644             :                         INC_SIZE(3);
    2645             : 
    2646             :                         *inst++ = GROUP_0F;
    2647             :                         /* cmovcc = setcc - 0x50. */
    2648             :                         *inst++ = cond_set - 0x50;
    2649             :                         *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
    2650             :                         return SLJIT_SUCCESS;
    2651             :                 }
    2652             : 
    2653             :                 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
    2654             :                 FAIL_IF(!inst);
    2655             :                 INC_SIZE(1 + 3 + 3 + 1);
    2656             :                 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2657             :                 /* Set al to conditional flag. */
    2658             :                 *inst++ = GROUP_0F;
    2659             :                 *inst++ = cond_set;
    2660             :                 *inst++ = MOD_REG | 0 /* eax */;
    2661             : 
    2662             :                 *inst++ = GROUP_0F;
    2663             :                 *inst++ = MOVZX_r_rm8;
    2664             :                 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
    2665             :                 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2666             :                 return SLJIT_SUCCESS;
    2667             :         }
    2668             : 
    2669             :         if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
    2670             :                 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
    2671             : 
    2672             :                 if (dst != SLJIT_R0) {
    2673             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
    2674             :                         FAIL_IF(!inst);
    2675             :                         INC_SIZE(1 + 3 + 2 + 1);
    2676             :                         /* Set low register to conditional flag. */
    2677             :                         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2678             :                         *inst++ = GROUP_0F;
    2679             :                         *inst++ = cond_set;
    2680             :                         *inst++ = MOD_REG | 0 /* eax */;
    2681             :                         *inst++ = OR_rm8_r8;
    2682             :                         *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
    2683             :                         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2684             :                 }
    2685             :                 else {
    2686             :                         inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
    2687             :                         FAIL_IF(!inst);
    2688             :                         INC_SIZE(2 + 3 + 2 + 2);
    2689             :                         /* Set low register to conditional flag. */
    2690             :                         *inst++ = XCHG_r_rm;
    2691             :                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
    2692             :                         *inst++ = GROUP_0F;
    2693             :                         *inst++ = cond_set;
    2694             :                         *inst++ = MOD_REG | 1 /* ecx */;
    2695             :                         *inst++ = OR_rm8_r8;
    2696             :                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
    2697             :                         *inst++ = XCHG_r_rm;
    2698             :                         *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
    2699             :                 }
    2700             :                 return SLJIT_SUCCESS;
    2701             :         }
    2702             : 
    2703             :         /* Set TMP_REG1 to the bit. */
    2704             :         inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
    2705             :         FAIL_IF(!inst);
    2706             :         INC_SIZE(1 + 3 + 3 + 1);
    2707             :         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2708             :         /* Set al to conditional flag. */
    2709             :         *inst++ = GROUP_0F;
    2710             :         *inst++ = cond_set;
    2711             :         *inst++ = MOD_REG | 0 /* eax */;
    2712             : 
    2713             :         *inst++ = GROUP_0F;
    2714             :         *inst++ = MOVZX_r_rm8;
    2715             :         *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
    2716             : 
    2717             :         *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
    2718             : 
    2719             :         if (GET_OPCODE(op) < SLJIT_ADD)
    2720             :                 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
    2721             : 
    2722             : #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
    2723             :                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
    2724             :         compiler->skip_checks = 1;
    2725             : #endif
    2726             :         return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
    2727             : #endif /* SLJIT_CONFIG_X86_64 */
    2728             : }
    2729             : 
    2730         126 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
    2731             :         sljit_s32 dst_reg,
    2732             :         sljit_s32 src, sljit_sw srcw)
    2733             : {
    2734             :         sljit_u8* inst;
    2735             : 
    2736         126 :         CHECK_ERROR();
    2737             :         CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
    2738             : 
    2739             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2740             :         dst_reg &= ~SLJIT_I32_OP;
    2741             : 
    2742             :         if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
    2743             :                 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
    2744             : #else
    2745         126 :         if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
    2746           0 :                 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
    2747             : #endif
    2748             : 
    2749             :         /* ADJUST_LOCAL_OFFSET is not needed. */
    2750             :         CHECK_EXTRA_REGS(src, srcw, (void)0);
    2751             : 
    2752             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2753         126 :         compiler->mode32 = dst_reg & SLJIT_I32_OP;
    2754         126 :         dst_reg &= ~SLJIT_I32_OP;
    2755             : #endif
    2756             : 
    2757         126 :         if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
    2758           0 :                 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
    2759           0 :                 src = TMP_REG1;
    2760           0 :                 srcw = 0;
    2761             :         }
    2762             : 
    2763         126 :         inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
    2764         126 :         FAIL_IF(!inst);
    2765         126 :         *inst++ = GROUP_0F;
    2766         126 :         *inst = get_jump_code(type & 0xff) - 0x40;
    2767         126 :         return SLJIT_SUCCESS;
    2768             : }
    2769             : 
    2770       56694 : SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
    2771             : {
    2772       56694 :         CHECK_ERROR();
    2773             :         CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
    2774       56694 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    2775             : 
    2776             :         CHECK_EXTRA_REGS(dst, dstw, (void)0);
    2777             : 
    2778             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2779       56694 :         compiler->mode32 = 0;
    2780             : #endif
    2781             : 
    2782       56694 :         ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
    2783             : 
    2784             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2785       56694 :         if (NOT_HALFWORD(offset)) {
    2786           0 :                 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
    2787             : #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
    2788             :                 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
    2789             :                 return compiler->error;
    2790             : #else
    2791           0 :                 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
    2792             : #endif
    2793             :         }
    2794             : #endif
    2795             : 
    2796       56694 :         if (offset != 0)
    2797       56686 :                 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
    2798           8 :         return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
    2799             : }
    2800             : 
    2801           0 : SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
    2802             : {
    2803             :         sljit_u8 *inst;
    2804             :         struct sljit_const *const_;
    2805             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2806             :         sljit_s32 reg;
    2807             : #endif
    2808             : 
    2809           0 :         CHECK_ERROR_PTR();
    2810             :         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
    2811           0 :         ADJUST_LOCAL_OFFSET(dst, dstw);
    2812             : 
    2813             :         CHECK_EXTRA_REGS(dst, dstw, (void)0);
    2814             : 
    2815           0 :         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
    2816           0 :         PTR_FAIL_IF(!const_);
    2817           0 :         set_const(const_, compiler);
    2818             : 
    2819             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2820           0 :         compiler->mode32 = 0;
    2821           0 :         reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
    2822             : 
    2823           0 :         if (emit_load_imm64(compiler, reg, init_value))
    2824           0 :                 return NULL;
    2825             : #else
    2826             :         if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
    2827             :                 return NULL;
    2828             : #endif
    2829             : 
    2830           0 :         inst = (sljit_u8*)ensure_buf(compiler, 2);
    2831           0 :         PTR_FAIL_IF(!inst);
    2832             : 
    2833           0 :         *inst++ = 0;
    2834           0 :         *inst++ = 1;
    2835             : 
    2836             : #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    2837           0 :         if (dst & SLJIT_MEM)
    2838           0 :                 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
    2839           0 :                         return NULL;
    2840             : #endif
    2841             : 
    2842           0 :         return const_;
    2843             : }
    2844             : 
    2845           0 : SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
    2846             : {
    2847             :         SLJIT_UNUSED_ARG(executable_offset);
    2848             : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    2849             :         sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
    2850             : #else
    2851           0 :         sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
    2852             : #endif
    2853           0 : }
    2854             : 
    2855           0 : SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
    2856             : {
    2857             :         SLJIT_UNUSED_ARG(executable_offset);
    2858           0 :         sljit_unaligned_store_sw((void*)addr, new_constant);
    2859           0 : }

Generated by: LCOV version 1.10

Generated at Wed, 26 Jan 2022 17:13:57 +0000 (2 days ago)

Copyright © 2005-2022 The PHP Group
All rights reserved.