diff --git a/include/fpu.h b/include/fpu.h index 44acd31bc9a894cd0d48b853f0788de4b87b2336..2b7cee5ecf24f0ecbfae6fe9c4a1bc9b750427ab 100644 --- a/include/fpu.h +++ b/include/fpu.h @@ -85,7 +85,7 @@ enum FPU_Round { ROUND_Chop = 3 }; -typedef struct { +typedef struct FPU_rec { FPU_Reg regs[9]; FPU_P_Reg p_regs[9]; FPU_Tag tags[9]; diff --git a/libretro/Makefile.libretro b/libretro/Makefile.libretro index eaf2acc9377fa01337047efb473643d9d628f45c..d92127c187076eceb9e388332e2f396e7e0df5f9 100644 --- a/libretro/Makefile.libretro +++ b/libretro/Makefile.libretro @@ -157,10 +157,10 @@ else ifeq ($(platform), wiiu) CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT) AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) WITH_FAKE_SDL = 1 + WITH_DYNAREC = ppc COMMONFLAGS += -DGEKKO -DWIIU -DHW_RVL -mwup -mcpu=750 -meabi -mhard-float -D__POWERPC__ -D__ppc__ -DMSB_FIRST -DWORDS_BIGENDIAN=1 -I./deps/include/ COMMONFLAGS += -U__INT32_TYPE__ -U __UINT32_TYPE__ -D__INT32_TYPE__=int -DWITH_FAKE_SDL STATIC_LINKING = 1 - WITH_DYNAREC = else ifeq ($(platform), libnx) include $(DEVKITPRO)/libnx/switch_rules TARGET := $(TARGET_NAME)_libretro_$(platform).a diff --git a/patch.diff b/patch.diff new file mode 100644 index 0000000000000000000000000000000000000000..e6acdf76f5b9c94969993451e162f72b373c2e04 --- /dev/null +++ b/patch.diff @@ -0,0 +1,1150 @@ +Index: include/fpu.h +=================================================================== +--- include/fpu.h (revision 4185) ++++ include/fpu.h (working copy) +@@ -80,7 +80,7 @@ + ROUND_Chop = 3 + }; + +-typedef struct { ++typedef struct FPU_rec { + FPU_Reg regs[9]; + FPU_P_Reg p_regs[9]; + FPU_Tag tags[9]; +Index: src/cpu/core_dynrec.cpp +=================================================================== +--- src/cpu/core_dynrec.cpp (revision 4185) ++++ src/cpu/core_dynrec.cpp (working copy) +@@ -138,6 +138,7 @@ + #define MIPSEL 0x03 + #define ARMV4LE 0x04 + #define ARMV7LE 0x05 ++#define POWERPC 0x06 + #define ARMV8LE 0x07 + + #if C_TARGETCPU == X86_64 +@@ -150,8 +151,15 @@ + #include "core_dynrec/risc_armv4le.h" + #elif C_TARGETCPU == ARMV8LE + #include "core_dynrec/risc_armv8le.h" ++#elif C_TARGETCPU == POWERPC ++#include "core_dynrec/risc_ppc.h" + #endif + ++#if !defined(WORDS_BIGENDIAN) ++#define gen_add_LE gen_add ++#define gen_mov_LE_word_to_reg gen_mov_word_to_reg ++#endif ++ + #include "core_dynrec/decoder.h" + + CacheBlockDynRec * LinkBlocks(BlockReturn ret) { +Index: src/cpu/core_dynrec/cache.h +=================================================================== +--- src/cpu/core_dynrec/cache.h (revision 4185) ++++ src/cpu/core_dynrec/cache.h (working copy) +@@ -171,7 +171,7 @@ + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) ++#if !defined(C_UNALIGNED_MEMORY) + host_writew(&invalidation_map[addr], + host_readw(&invalidation_map[addr])+0x101); + #else +@@ -193,7 +193,7 @@ + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) ++#if !defined(C_UNALIGNED_MEMORY) + host_writed(&invalidation_map[addr], + host_readd(&invalidation_map[addr])+0x1010101); + #else +@@ -240,7 +240,7 @@ + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) ++#if !defined(C_UNALIGNED_MEMORY) + host_writew(&invalidation_map[addr], + host_readw(&invalidation_map[addr])+0x101); + #else +@@ -269,7 +269,7 @@ + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) ++#if !defined(C_UNALIGNED_MEMORY) + host_writed(&invalidation_map[addr], + host_readd(&invalidation_map[addr])+0x1010101); + #else +@@ -553,6 +553,8 @@ + + static void dyn_return(BlockReturn retcode,bool ret_exception); + static void dyn_run_code(void); ++static void cache_block_before_close(void); ++static void cache_block_closing(Bit8u* block_start,Bitu block_size); + + + /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */ +@@ -614,18 +616,26 @@ + } + // setup the default blocks for block linkage returns + cache.pos=&cache_code_link_blocks[0]; ++ core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; ++ // can use up to PAGESIZE_TEMP-64 bytes ++ dyn_run_code(); ++ cache_block_before_close(); ++ ++ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64]; + link_blocks[0].cache.start=cache.pos; + // link code that returns with a special return code ++ // must be less than 32 bytes + dyn_return(BR_Link1,false); +- cache.pos=&cache_code_link_blocks[32]; ++ cache_block_before_close(); ++ ++ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32]; + link_blocks[1].cache.start=cache.pos; + // link code that returns with a special return code ++ // must be less than 32 bytes + dyn_return(BR_Link2,false); ++ cache_block_before_close(); + +- cache.pos=&cache_code_link_blocks[64]; +- core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; +-// link_blocks[1].cache.start=cache.pos; +- dyn_run_code(); ++ cache_block_closing(cache_code_link_blocks, PAGESIZE_TEMP); + + cache.free_pages=0; + cache.last_page=0; +Index: src/cpu/core_dynrec/decoder_basic.h +=================================================================== +--- src/cpu/core_dynrec/decoder_basic.h (revision 4185) ++++ src/cpu/core_dynrec/decoder_basic.h (working copy) +@@ -986,10 +986,10 @@ + // succeeded, use the pointer to avoid code invalidation + if (!addseg) { + if (!scaled_reg_used) { +- gen_mov_word_to_reg(ea_reg,(void*)val,true); ++ gen_mov_LE_word_to_reg(ea_reg,(void*)val,true); + } else { + DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } + } else { + if (!scaled_reg_used) { +@@ -997,7 +997,7 @@ + } else { + DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); + } +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } + return; + } +@@ -1038,10 +1038,10 @@ + if (!addseg) { + if (!scaled_reg_used) { + MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } else { + DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } + } else { + if (!scaled_reg_used) { +@@ -1050,7 +1050,7 @@ + DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); + } + ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } + return; + } +@@ -1115,11 +1115,11 @@ + // succeeded, use the pointer to avoid code invalidation + if (!addseg) { + MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } else { + MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base)); + ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); +- gen_add(ea_reg,(void*)val); ++ gen_add_LE(ea_reg,(void*)val); + } + return; + } +Index: src/cpu/core_dynrec/decoder_opcodes.h +=================================================================== +--- src/cpu/core_dynrec/decoder_opcodes.h (revision 4185) ++++ src/cpu/core_dynrec/decoder_opcodes.h (working copy) +@@ -250,12 +250,12 @@ + Bitu val; + if (decode.big_op) { + if (decode_fetchd_imm(val)) { +- gen_mov_word_to_reg(FC_OP2,(void*)val,true); ++ gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true); + return; + } + } else { + if (decode_fetchw_imm(val)) { +- gen_mov_word_to_reg(FC_OP2,(void*)val,false); ++ gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false); + return; + } + } +@@ -287,13 +287,13 @@ + Bitu val; + if (decode.big_op) { + if (decode_fetchd_imm(val)) { +- gen_mov_word_to_reg(FC_OP1,(void*)val,true); ++ gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true); + MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg); + return; + } + } else { + if (decode_fetchw_imm(val)) { +- gen_mov_word_to_reg(FC_OP1,(void*)val,false); ++ gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false); + MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg); + return; + } +@@ -330,7 +330,7 @@ + if (decode.big_addr) { + Bitu val; + if (decode_fetchd_imm(val)) { +- gen_add(FC_ADDR,(void*)val); ++ gen_add_LE(FC_ADDR,(void*)val); + } else { + gen_add_imm(FC_ADDR,(Bit32u)val); + } +@@ -1179,7 +1179,7 @@ + gen_call_function_raw((void*)&dynrec_pop_word); + gen_extend_word(false,FC_RETOP); + } +- gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(®_eip):(void*)(®_ip),true); ++ gen_mov_word_from_reg(FC_RETOP,(void*)(®_eip),true); + + if (bytes) gen_add_direct_word(®_esp,bytes,true); + dyn_return(BR_Normal); +Index: src/cpu/core_dynrec/Makefile.am +=================================================================== +--- src/cpu/core_dynrec/Makefile.am (revision 4185) ++++ src/cpu/core_dynrec/Makefile.am (working copy) +@@ -2,4 +2,5 @@ + dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ + risc_armv4le.h risc_armv4le-common.h \ + risc_armv4le-o3.h risc_armv4le-thumb.h \ +- risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h ++ risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \ ++ risc_ppc.h +Index: src/cpu/core_dynrec/risc_ppc.h +=================================================================== +--- src/cpu/core_dynrec/risc_ppc.h (revision 0) ++++ src/cpu/core_dynrec/risc_ppc.h (working copy) +@@ -0,0 +1,897 @@ ++/* ++ * Copyright (C) 2002-2019 The DOSBox Team ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++// some configuring defines that specify the capabilities of this architecture ++// or aspects of the recompiling ++ ++// protect FC_ADDR over function calls if necessaray ++//#define DRC_PROTECT_ADDR_REG ++ ++// try to use non-flags generating functions if possible ++#define DRC_FLAGS_INVALIDATION ++// try to replace _simple functions by code ++#define DRC_FLAGS_INVALIDATION_DCODE ++ ++// type with the same size as a pointer ++#define DRC_PTR_SIZE_IM Bit32u ++ ++// calling convention modifier ++#define DRC_FC /* nothing */ ++#define DRC_CALL_CONV /* nothing */ ++ ++#define DRC_USE_REGS_ADDR ++#define DRC_USE_SEGS_ADDR ++ ++// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value) ++#define USE_SDA_BASE ++ ++// register mapping ++enum HostReg { ++ HOST_R0=0, ++ HOST_R1, ++ HOST_R2, ++ HOST_R3, ++ HOST_R4, ++ HOST_R5, ++ HOST_R6, ++ HOST_R7, ++ HOST_R8, ++ HOST_R9, ++ HOST_R10, ++ HOST_R11, ++ HOST_R12, ++ HOST_R13, ++ HOST_R14, ++ HOST_R15, ++ HOST_R16, ++ HOST_R17, ++ HOST_R18, ++ HOST_R19, ++ HOST_R20, ++ HOST_R21, ++ HOST_R22, ++ HOST_R23, ++ HOST_R24, ++ HOST_R25, ++ HOST_R26, ++ HOST_R27, ++ HOST_R28, ++ HOST_R29, ++ HOST_R30, ++ HOST_R31, ++ ++ HOST_NONE ++}; ++ ++static const HostReg RegParams[] = { ++ HOST_R3, HOST_R4, HOST_R5, HOST_R6, ++ HOST_R7, HOST_R8, HOST_R9, HOST_R10 ++}; ++ ++#if C_FPU ++extern struct FPU_rec fpu; ++#endif ++ ++#if defined(USE_SDA_BASE) ++extern Bit32u _SDA_BASE_[]; ++#endif ++ ++// register that holds function return values ++#define FC_RETOP HOST_R3 ++ ++// register used for address calculations, if the ABI does not ++// state that this register is preserved across function calls ++// then define DRC_PROTECT_ADDR_REG above ++#define FC_ADDR HOST_R29 ++ ++// register that points to Segs[] ++#define FC_SEGS_ADDR HOST_R30 ++// register that points to cpu_regs[] ++#define FC_REGS_ADDR HOST_R31 ++ ++// register that holds the first parameter ++#define FC_OP1 RegParams[0] ++ ++// register that holds the second parameter ++#define FC_OP2 RegParams[1] ++ ++// special register that holds the third parameter for _R3 calls (byte accessible) ++#define FC_OP3 RegParams[2] ++ ++// register that holds byte-accessible temporary values ++//#define FC_TMP_BA1 HOST_R6 ++#define FC_TMP_BA1 FC_OP2 ++ ++// register that holds byte-accessible temporary values ++//#define FC_TMP_BA2 HOST_R7 ++#define FC_TMP_BA2 FC_OP1 ++ ++// temporary register for LEA ++#define TEMP_REG_DRC HOST_R10 ++ ++#define IMM(op, regsd, rega, imm) (((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit32u)(imm))&0xFFFF)) ++#define EXT(regsd, rega, regb, op, rc) ( (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)| ((op)<<1)|(rc)) ++#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc)) ++ ++#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm)) ++#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc)) ++#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc)) ++ ++// move a full register from reg_src to reg_dst ++static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) ++{ ++ if (reg_dst != reg_src) ++ EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src) ++} ++ ++// move a 16bit constant value into dest_reg ++// the upper 16bit of the destination register may be destroyed ++static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) ++{ ++ IMM_OP(14, dest_reg, 0, imm); // li dest,imm ++} ++ ++DRC_PTR_SIZE_IM block_ptr; ++ ++// Helper for loading addresses ++static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest) ++{ ++ Bit32s off; ++ ++ if ((Bit16s)addr == addr) ++ return HOST_R0; ++ ++ off = addr - (Bit32s)&Segs; ++ if ((Bit16s)off == off) ++ { ++ addr = off; ++ return FC_SEGS_ADDR; ++ } ++ ++ off = addr - (Bit32s)&cpu_regs; ++ if ((Bit16s)off == off) ++ { ++ addr = off; ++ return FC_REGS_ADDR; ++ } ++ ++ off = addr - (Bit32s)block_ptr; ++ if ((Bit16s)off == off) ++ { ++ addr = off; ++ return HOST_R27; ++ } ++ ++#if C_FPU ++ off = addr - (Bit32s)&fpu; ++ if ((Bit16s)off == off) ++ { ++ addr = off; ++ return HOST_R28; ++ } ++#endif ++ ++#if defined(USE_SDA_BASE) ++ off = addr - (Bit32s)_SDA_BASE_; ++ if ((Bit16s)off == off) ++ { ++ addr = off; ++ return HOST_R13; ++ } ++#endif ++ ++ IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha ++ addr = (Bit16s)addr; ++ return dest; ++} ++ ++// move a 32bit constant value into dest_reg ++static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) ++{ ++ HostReg ld = gen_addr((Bit32s&)imm, dest_reg); ++ if (imm || ld != dest_reg) ++ IMM_OP(14, dest_reg, ld, imm); // addi dest_reg, ldr, imm@l ++} ++ ++// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg ++// 16bit moves may destroy the upper 16bit of the destination register ++static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { ++ Bit32s addr = (Bit32s)data; ++ HostReg ld = gen_addr(addr, dest_reg); ++ IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld) ++} ++ ++// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg ++static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) { ++ Bit32u addr = (Bit32u)data; ++ gen_mov_dword_to_reg_imm(dest_reg, addr); ++ EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest ++} ++ ++// move an 8bit constant value into dest_reg ++// the upper 24bit of the destination register can be destroyed ++// this function does not use FC_OP1/FC_OP2 as dest_reg as these ++// registers might not be directly byte-accessible on some architectures ++static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { ++ gen_mov_word_to_reg_imm(dest_reg, imm); ++} ++ ++// move an 8bit constant value into dest_reg ++// the upper 24bit of the destination register can be destroyed ++// this function can use FC_OP1/FC_OP2 as dest_reg which are ++// not directly byte-accessible on some architectures ++static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { ++ gen_mov_word_to_reg_imm(dest_reg, imm); ++} ++ ++// move 32bit (dword==true) or 16bit (dword==false) of a register into memory ++static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) ++{ ++ Bit32s addr = (Bit32s)dest; ++ HostReg ld = gen_addr(addr, HOST_R8); ++ IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld) ++} ++ ++// move an 8bit value from memory into dest_reg ++// the upper 24bit of the destination register can be destroyed ++// this function does not use FC_OP1/FC_OP2 as dest_reg as these ++// registers might not be directly byte-accessible on some architectures ++static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) ++{ ++ Bit32s addr = (Bit32s)data; ++ HostReg ld = gen_addr(addr, dest_reg); ++ IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld) ++} ++ ++// move an 8bit value from memory into dest_reg ++// the upper 24bit of the destination register can be destroyed ++// this function can use FC_OP1/FC_OP2 as dest_reg which are ++// not directly byte-accessible on some architectures ++static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { ++ gen_mov_byte_to_reg_low(dest_reg, data); ++} ++ ++// move the lowest 8bit of a register into memory ++static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) ++{ ++ Bit32s addr = (Bit32s)dest; ++ HostReg ld = gen_addr(addr, HOST_R8); ++ IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld) ++} ++ ++// convert an 8bit word to a 32bit dword ++// the register is zero-extended (sign==false) or sign-extended (sign==true) ++static void gen_extend_byte(bool sign,HostReg reg) ++{ ++ if (sign) ++ { ++ EXT_OP(reg, reg, 0, 954, 0); // extsb reg, reg ++ return; ++ } ++ ++ // check if previous instruction is "lbz reg, * ++ if ((*(Bit32u*)(cache.pos-4) & 0xFFE00000) != IMM(34, reg, 0, 0)) ++ RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, reg, 0, 24, 31 ++ // else register is already zero-extended ++} ++ ++// convert a 16bit word to a 32bit dword ++// the register is zero-extended (sign==false) or sign-extended (sign==true) ++static void gen_extend_word(bool sign,HostReg reg) ++{ ++ // check if previous instruction is "lhz reg, *" ++ Bit32u *op = (Bit32u*)(cache.pos-4); ++ if ((*op & 0xFFE00000) == IMM(40, reg, 0, 0)) ++ { ++ if (sign) // change lhz -> lha ++ *op |= 0x08000000; ++ // else zero-extension already done ++ return; ++ } ++ ++ if (sign) ++ EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg ++ else ++ RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31 ++} ++ ++// add a 32bit value from memory to a full register ++static void gen_add(HostReg reg,void* op) ++{ ++ gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op ++ EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 ++} ++ ++// add a 32bit value from host memory to a full register ++static void gen_add_LE(HostReg reg,void* op) ++{ ++ gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24); ++ EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 ++} ++ ++// add a 32bit constant value to a full register ++static void gen_add_imm(HostReg reg,Bit32u imm) ++{ ++ if ((Bit16s)imm != (Bit32s)imm) ++ IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha ++ if ((Bit16s)imm) ++ IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l ++} ++ ++// and a 32bit constant value with a full register ++static void gen_and_imm(HostReg reg,Bit32u imm) { ++ Bits sbit,ebit,tbit,bbit,abit,i; ++ ++ // sbit = number of leading 0 bits ++ // ebit = number of trailing 0 bits ++ // tbit = number of total 0 bits ++ // bbit = number of leading 1 bits ++ // abit = number of trailing 1 bits ++ ++ if (imm == 0xFFFFFFFF) ++ return; ++ ++ if (!imm) ++ return gen_mov_word_to_reg_imm(reg, 0); ++ ++ sbit = ebit = tbit = bbit = abit = 0; ++ for (i=0; i < 32; i++) ++ { ++ if (!(imm & (1<<(31-i)))) ++ { ++ abit = 0; ++ tbit++; ++ if (sbit == i) ++ sbit++; ++ ebit++; ++ } ++ else ++ { ++ ebit = 0; ++ if (bbit == i) ++ bbit++; ++ abit++; ++ } ++ } ++ ++ if (sbit >= 16) ++ { ++ IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm ++ return; ++ } ++ if (ebit >= 16) ++ { ++ IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16) ++ return; ++ } ++ ++ if (sbit + ebit == tbit) ++ { ++ RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit ++ return; ++ } ++ ++ if (bbit + abit == (32 - tbit)) ++ { ++ RLW_OP(21,reg,reg,0,31-abit,bbit,0); // rlwinm reg,reg,0,31-abit,bbit ++ return; ++ } ++ ++ gen_mov_dword_to_reg_imm(HOST_R8, imm); ++ EXT_OP(reg, reg, HOST_R8, 28, 0); // and reg, reg, r8 ++} ++ ++// move a 32bit constant value into memory ++static void gen_mov_direct_dword(void* dest,Bit32u imm) { ++ gen_mov_dword_to_reg_imm(HOST_R9, imm); ++ gen_mov_word_from_reg(HOST_R9, dest, 1); ++} ++ ++// move an address into memory (assumes address != NULL) ++static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) ++{ ++ block_ptr = 0; ++ gen_mov_dword_to_reg_imm(HOST_R27, imm); ++ // this will probably be used to look-up the linked blocks ++ block_ptr = imm; ++ gen_mov_word_from_reg(HOST_R27, dest, 1); ++} ++ ++// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value ++static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) ++{ ++ HostReg ld; ++ Bit32s addr = (Bit32s)dest; ++ ++ if (!dword) ++ { ++ imm &= 0xFFFF; ++ addr += 2; ++ } ++ ++ if (!imm) ++ return; ++ ++ ld = gen_addr(addr, HOST_R8); ++ IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld) ++ if (dword && (Bit16s)imm != (Bit32s)imm) ++ IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha ++ if (!dword || (Bit16s)imm) ++ IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l ++ IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld) ++} ++ ++// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value ++static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { ++ gen_add_direct_word(dest, -(Bit32s)imm, dword); ++} ++ ++// effective address calculation, destination is dest_reg ++// scale_reg is scaled by scale (scale_reg*(2^scale)) and ++// added to dest_reg, then the immediate value is added ++static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) ++{ ++ if (scale) ++ { ++ RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // rlwinm r8,scale_reg,scale,0,31-scale ++ scale_reg = HOST_R8; ++ } ++ ++ gen_add_imm(dest_reg, imm); ++ EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled ++} ++ ++// effective address calculation, destination is dest_reg ++// dest_reg is scaled by scale (dest_reg*(2^scale)), ++// then the immediate value is added ++static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) ++{ ++ if (scale) ++ RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // rlwinm dest,dest,scale,0,31-scale ++ ++ gen_add_imm(dest_reg, imm); ++} ++ ++// helper function to choose direct or indirect call ++static void INLINE do_gen_call(void *func, Bit32u *pos) ++{ ++ Bit32s f = (Bit32s)func; ++ Bit32s off = f - (Bit32s)pos; ++ ++ // relative branches are limited to +/- ~32MB ++ if (off < 0x02000000 && off >= -0x02000000) ++ { ++ pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func ++ pos[1] = IMM(24, 0, 0, 0); // nop ++ pos[2] = IMM(24, 0, 0, 0); ++ pos[3] = IMM(24, 0, 0, 0); ++ return; ++ } ++ ++ pos[0] = IMM(15, HOST_R8, 0, f>>16); // lis r8,imm@h ++ pos[1] = IMM(24, HOST_R8, HOST_R8, f); // ori r8,r8,imm@l ++ pos[2] = EXT(HOST_R8, 9, 0, 467, 0); // mtctr r8 ++ pos[3] = IMM(19, 0b10100, 0, (528<<1)|1); // bctrl ++} ++ ++// generate a call to a parameterless function ++static void INLINE gen_call_function_raw(void * func) ++{ ++ do_gen_call(func, (Bit32u*)cache.pos); ++ cache.pos += 16; ++} ++ ++// generate a call to a function with paramcount parameters ++// note: the parameters are loaded in the architecture specific way ++// using the gen_load_param_ functions below ++static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) ++{ ++ Bit32u proc_addr=(Bit32u)cache.pos; ++ gen_call_function_raw(func); ++ return proc_addr; ++} ++ ++// load an immediate value as param'th function parameter ++static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { ++ gen_mov_dword_to_reg_imm(RegParams[param], imm); ++} ++ ++// load an address as param'th function parameter ++static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { ++ gen_load_param_imm(addr, param); ++} ++ ++// load a host-register as param'th function parameter ++static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { ++ gen_mov_regs(RegParams[param], (HostReg)reg); ++} ++ ++// load a value from memory as param'th function parameter ++static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { ++ gen_mov_word_to_reg(RegParams[param], (void*)mem, true); ++} ++ ++// jump to an address pointed at by ptr, offset is in imm ++static void gen_jmp_ptr(void * ptr,Bits imm=0) { ++ gen_mov_word_to_reg(HOST_R8,ptr,true); // r8 = *(Bit32u*)ptr ++ if ((Bit16s)imm != (Bit32s)imm) ++ IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha ++ IMM_OP(32, HOST_R8, HOST_R8, imm); // lwz r8, imm@l(r8) ++ EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 ++ IMM_OP(19, 0b10100, 0, 528<<1); // bctr ++} ++ ++// short conditional jump (+-127 bytes) if register is zero ++// the destination is set by gen_fill_branch() later ++static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) ++{ ++ if (!dword) ++ IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF ++ else ++ EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg ++ ++ IMM_OP(16, 0b01100, 2, 0); // bc 12,CR0[Z] (beq) ++ return ((Bit32u)cache.pos-4); ++} ++ ++// short conditional jump (+-127 bytes) if register is nonzero ++// the destination is set by gen_fill_branch() later ++static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) ++{ ++ if (!dword) ++ IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF ++ else ++ EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg ++ ++ IMM_OP(16, 0b00100, 2, 0); // bc 4,CR0[Z] (bne) ++ return ((Bit32u)cache.pos-4); ++} ++ ++// calculate relative offset and fill it into the location pointed to by data ++static void gen_fill_branch(DRC_PTR_SIZE_IM data) ++{ ++#if C_DEBUG ++ Bits len=(Bit32u)cache.pos-data; ++ if (len<0) len=-len; ++ if (len >= 0x8000) LOG_MSG("Big jump %d",len); ++#endif ++ ++ ((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC; ++} ++ ++ ++// conditional jump if register is nonzero ++// for isdword==true the 32bit of the register are tested ++// for isdword==false the lowest 8bit of the register are tested ++static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword) ++{ ++ if (!dword) ++ IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF ++ else ++ EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg ++ ++ IMM_OP(16, 0b00100, 2, 0); // bne ++ return ((Bit32u)cache.pos-4); ++} ++ ++// compare 32bit-register against zero and jump if value less/equal than zero ++static Bit32u gen_create_branch_long_leqzero(HostReg reg) ++{ ++ EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg ++ ++ IMM_OP(16, 0b00100, 1, 0); // ble ++ return ((Bit32u)cache.pos-4); ++} ++ ++// calculate long relative offset and fill it into the location pointed to by data ++static void gen_fill_branch_long(Bit32u data) { ++ return gen_fill_branch((DRC_PTR_SIZE_IM)data); ++} ++ ++static void cache_block_closing(Bit8u* block_start,Bitu block_size) { ++#if defined(__GNUC__) ++ Bit8u* start = (Bit8u*)((Bit32u)block_start & -32); ++ ++ while (start < block_start + block_size) ++ { ++ asm volatile("dcbst %y0; icbi %y0" :: "Z"(*start)); ++ start += 32; ++ } ++ asm volatile("sync; isync"); ++#else ++ #error "Don't know how to flush/invalidate CacheBlock with this compiler" ++#endif ++} ++ ++static void cache_block_before_close(void) {} ++ ++// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it ++static Bit32s epilog_addr; ++static Bit8u *getCF_glue; ++static void gen_run_code(void) { ++ // prolog ++ IMM_OP(37, HOST_R1, HOST_R1, -32); // stwu sp,-32(sp) ++ EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 ++ EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0 ++ ++ IMM_OP(47, HOST_R26, HOST_R1, 8); // stmw r26, 8(sp) ++ ++ IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16); // lis FC_SEGS_ADDR, Segs@h ++ IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs); // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l ++ ++ IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16); // lis FC_REGS_ADDR, cpu_regs@h ++ IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs); // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l ++ ++#if C_FPU ++ IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16); // lis r28, fpu@h ++ IMM_OP(24, HOST_R28, HOST_R28, &fpu); // ori r28, r28, fpu@l ++#endif ++ ++ IMM_OP(36, HOST_R0, HOST_R1, 32+4); // stw r0,32+4(sp) ++ IMM_OP(19, 0b10100, 0, 528<<1); // bctr ++ ++ // epilog ++ epilog_addr = (Bit32s)cache.pos; ++ IMM_OP(32, HOST_R0, HOST_R1, 32+4); // lwz r0,32+4(sp) ++ IMM_OP(46, HOST_R26, HOST_R1, 8); // lmw r26, 8(sp) ++ EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0 ++ IMM_OP(14, HOST_R1, HOST_R1, 32); // addi sp, sp, 32 ++ IMM_OP(19, 0b10100, 0, 16<<1); // blr ++ ++ // trampoline to call get_CF() ++ getCF_glue = cache.pos; ++ gen_mov_dword_to_reg_imm(FC_OP1, (Bit32u)get_CF); // FC_OP1 = &get_CF ++ EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 ++ IMM_OP(19, 0b10100, 0, 528<<1); // bctr ++} ++ ++// return from a function ++static void gen_return_function(void) ++{ ++ Bit32s off = epilog_addr - (Bit32s)cache.pos; ++ ++ // relative branches are limited to +/- 32MB ++ if (off < 0x02000000 && off >= -0x02000000) { ++ cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b epilog ++ return; ++ } ++ ++ gen_mov_dword_to_reg_imm(HOST_R8, epilog_addr); ++ EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 ++ IMM_OP(19, 0b10100, 0, 528<<1); // bctr ++} ++ ++// called when a call to a function can be replaced by a ++// call to a simpler function ++static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) ++{ ++ Bit32u *op = (Bit32u*)pos; ++ Bit32u *end = op+4; ++ ++ switch (flags_type) { ++#if defined(DRC_FLAGS_INVALIDATION_DCODE) ++ // try to avoid function calls but rather directly fill in code ++ case t_ADDb: ++ case t_ADDw: ++ case t_ADDd: ++ *op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_ORb: ++ case t_ORw: ++ case t_ORd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_ADCb: ++ case t_ADCw: ++ case t_ADCd: ++ op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2 ++ op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF ++ op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = CF!=0) ++ op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + CF!=0 ++ return; ++ case t_SBBb: ++ case t_SBBw: ++ case t_SBBd: ++ op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2 ++ op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF ++ op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = CF==0) ++ op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + CF==0 ++ return; ++ case t_ANDb: ++ case t_ANDw: ++ case t_ANDd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_SUBb: ++ case t_SUBw: ++ case t_SUBd: ++ *op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1 ++ break; ++ case t_XORb: ++ case t_XORw: ++ case t_XORd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_CMPb: ++ case t_CMPw: ++ case t_CMPd: ++ case t_TESTb: ++ case t_TESTw: ++ case t_TESTd: ++ break; ++ case t_INCb: ++ case t_INCw: ++ case t_INCd: ++ *op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1 ++ break; ++ case t_DECb: ++ case t_DECw: ++ case t_DECd: ++ *op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1 ++ break; ++ case t_NEGb: ++ case t_NEGw: ++ case t_NEGd: ++ *op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1 ++ break; ++ case t_SHLb: ++ case t_SHLw: ++ case t_SHLd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_SHRb: ++ case t_SHRw: ++ case t_SHRd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ case t_SARb: ++ *op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1 ++ case t_SARw: ++ if (flags_type == t_SARw) ++ *op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1 ++ case t_SARd: ++ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ ++ case t_ROLb: ++ *op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7 ++ case t_ROLw: ++ if (flags_type == t_ROLw) ++ *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 ++ case t_ROLd: ++ *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ ++ case t_RORb: ++ *op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23 ++ case t_RORw: ++ if (flags_type == t_RORw) ++ *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 ++ case t_RORd: ++ *op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2) ++ *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 ++ break; ++ ++ case t_DSHLw: // technically not correct for FC_OP3 > 16 ++ *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 ++ *op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3 ++ break; ++ case t_DSHLd: ++ op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3 ++ op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3) ++ op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3 ++ op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 ++ return; ++ case t_DSHRw: // technically not correct for FC_OP3 > 16 ++ *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 ++ *op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3 ++ break; ++ case t_DSHRd: ++ op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3 ++ op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3) ++ op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3 ++ op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 ++ return; ++#endif ++ default: ++ do_gen_call(fct_ptr, op); ++ return; ++ } ++ ++ do ++ { ++ *op++ = IMM(24, 0, 0, 0); // nop ++ } while (op < end); ++} ++ ++// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) ++// 16bit moves may destroy the upper 16bit of the destination register ++static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { ++ IMM_OP(40, dest_reg, FC_SEGS_ADDR, index); // lhz dest_reg, index(FC_SEGS_ADDR) ++} ++ ++// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) ++static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { ++ IMM_OP(32, dest_reg, FC_SEGS_ADDR, index); // lwz dest_reg, index(FC_SEGS_ADDR) ++} ++ ++// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) ++static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { ++ gen_mov_seg32_to_reg(HOST_R8, index); ++ EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8 ++} ++ ++// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) ++// 16bit moves may destroy the upper 16bit of the destination register ++static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) { ++ IMM_OP(40, dest_reg, FC_REGS_ADDR, index); // lhz dest_reg, index(FC_REGS_ADDR) ++} ++ ++// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) ++static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) { ++ IMM_OP(32, dest_reg, FC_REGS_ADDR, index); // lwz dest_reg, index(FC_REGS_ADDR) ++} ++ ++// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR ++// the upper 24bit of the destination register can be destroyed ++// this function does not use FC_OP1/FC_OP2 as dest_reg as these ++// registers might not be directly byte-accessible on some architectures ++static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) { ++ IMM_OP(34, dest_reg, FC_REGS_ADDR, index); // lbz dest_reg, index(FC_REGS_ADDR) ++} ++ ++// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR ++// the upper 24bit of the destination register can be destroyed ++// this function can use FC_OP1/FC_OP2 as dest_reg which are ++// not directly byte-accessible on some architectures ++static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { ++ gen_mov_regbyte_to_reg_low(dest_reg, index); ++} ++ ++// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) ++static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) { ++ IMM_OP(44, src_reg, FC_REGS_ADDR, index); // sth src_reg, index(FC_REGS_ADDR) ++} ++ ++// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) ++static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) { ++ IMM_OP(36, src_reg, FC_REGS_ADDR, index); // stw src_reg, index(FC_REGS_ADDR) ++} ++ ++// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR ++static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) { ++ IMM_OP(38, src_reg, FC_REGS_ADDR, index); // stb src_reg, index(FC_REGS_ADDR) ++} ++ ++// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) ++static void gen_add_regval32_to_reg(HostReg reg,Bitu index) { ++ gen_mov_regval32_to_reg(HOST_R8, index); ++ EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8 ++} ++ ++// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) ++static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { ++ IMM_OP(dword ? 36 : 44, src_reg, FC_REGS_ADDR, index); // stw/sth src_reg, index(FC_REGS_ADDR) ++} ++ ++// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) ++// 16bit moves may destroy the upper 16bit of the destination register ++static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { ++ IMM_OP(dword ? 32 : 40, dest_reg, FC_REGS_ADDR, index); // lwz/lhz dest_reg, index(FC_REGS_ADDR) ++} ++ diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp index 7a7746f8048d0b56bbc6d32db1cb7cf88358d7af..08b48e0273209b4f4950f7d475c399fbc6914261 100644 --- a/src/cpu/core_dynrec.cpp +++ b/src/cpu/core_dynrec.cpp @@ -153,6 +153,7 @@ static struct { #define MIPSEL 0x03 #define ARMV4LE 0x04 #define ARMV7LE 0x05 +#define POWERPC 0x06 #define ARMV8LE 0x07 #if C_TARGETCPU == X86_64 @@ -165,6 +166,13 @@ static struct { #include "core_dynrec/risc_armv4le.h" #elif C_TARGETCPU == ARMV8LE #include "core_dynrec/risc_armv8le.h" +#elif C_TARGETCPU == POWERPC +#include "core_dynrec/risc_ppc.h" +#endif + +#if !defined(WORDS_BIGENDIAN) +#define gen_add_LE gen_add +#define gen_mov_LE_word_to_reg gen_mov_word_to_reg #endif #include "core_dynrec/decoder.h" diff --git a/src/cpu/core_dynrec.cpp.orig b/src/cpu/core_dynrec.cpp.orig new file mode 100644 index 0000000000000000000000000000000000000000..3df063121772e0a1d69e07260dee8bedc5e30e0d --- /dev/null +++ b/src/cpu/core_dynrec.cpp.orig @@ -0,0 +1,353 @@ +/* + * Copyright (C) 2002-2019 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#include "dosbox.h" + +#if (C_DYNREC) + +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <stddef.h> +#include <stdlib.h> + +#if defined (WIN32) +#include <windows.h> +#include <winbase.h> +#endif + +#if (C_HAVE_MPROTECT) +#include <sys/mman.h> + +#include <limits.h> +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif +#endif /* C_HAVE_MPROTECT */ + +#include "callback.h" +#include "regs.h" +#include "mem.h" +#include "cpu.h" +#include "debug.h" +#include "paging.h" +#include "inout.h" +#include "lazyflags.h" +#include "pic.h" + +#define CACHE_MAXSIZE (4096*2) +#define CACHE_TOTAL (1024*1024*8) +#define CACHE_PAGES (512) +#define CACHE_BLOCKS (128*1024) +#define CACHE_ALIGN (16) +#define DYN_HASH_SHIFT (4) +#define DYN_PAGE_HASH (4096>>DYN_HASH_SHIFT) +#define DYN_LINKS (16) + + +//#define DYN_LOG 1 //Turn Logging on. + +#ifdef HAVE_LIBNX +#include <switch.h> + +extern "C" { +Jit dynarec_jit; +void *jit_rx_addr = 0; +u_char *jit_dynrec = 0; +void *jit_rw_addr = 0; +void *jit_rw_buffer = 0; +void *jit_old_addr = 0; +size_t jit_len = 0; +bool jit_is_executable = false; +} +#endif + + +#if C_FPU +#define CPU_FPU 1 //Enable FPU escape instructions +#endif + + +// the emulated x86 registers +#define DRC_REG_EAX 0 +#define DRC_REG_ECX 1 +#define DRC_REG_EDX 2 +#define DRC_REG_EBX 3 +#define DRC_REG_ESP 4 +#define DRC_REG_EBP 5 +#define DRC_REG_ESI 6 +#define DRC_REG_EDI 7 + +// the emulated x86 segment registers +#define DRC_SEG_ES 0 +#define DRC_SEG_CS 1 +#define DRC_SEG_SS 2 +#define DRC_SEG_DS 3 +#define DRC_SEG_FS 4 +#define DRC_SEG_GS 5 + + +// access to a general register +#define DRCD_REG_VAL(reg) (&cpu_regs.regs[reg].dword) +// access to a segment register +#define DRCD_SEG_VAL(seg) (&Segs.val[seg]) +// access to the physical value of a segment register/selector +#define DRCD_SEG_PHYS(seg) (&Segs.phys[seg]) + +// access to an 8bit general register +#define DRCD_REG_BYTE(reg,idx) (&cpu_regs.regs[reg].byte[idx?BH_INDEX:BL_INDEX]) +// access to 16/32bit general registers +#define DRCD_REG_WORD(reg,dwrd) ((dwrd)?((void*)(&cpu_regs.regs[reg].dword[DW_INDEX])):((void*)(&cpu_regs.regs[reg].word[W_INDEX]))) + + +enum BlockReturn { + BR_Normal=0, + BR_Cycles, + BR_Link1,BR_Link2, + BR_Opcode, +#if (C_DEBUG) + BR_OpcodeFull, +#endif + BR_Iret, + BR_CallBack, + BR_SMCBlock +}; + +// identificator to signal self-modification of the currently executed block +#define SMC_CURRENT_BLOCK 0xffff + + +static void IllegalOptionDynrec(const char* msg) { + E_Exit("DynrecCore: illegal option in %s",msg); +} + +static struct { + BlockReturn (*runcode)(Bit8u*); // points to code that can start a block + Bitu callback; // the occurred callback + Bitu readdata; // spare space used when reading from memory + Bit32u protected_regs[8]; // space to save/restore register values +} core_dynrec; + + +#include "core_dynrec/cache.h" + +#define X86 0x01 +#define X86_64 0x02 +#define MIPSEL 0x03 +#define ARMV4LE 0x04 +#define ARMV7LE 0x05 +#define ARMV8LE 0x07 + +#if C_TARGETCPU == X86_64 +#include "core_dynrec/risc_x64.h" +#elif C_TARGETCPU == X86 +#include "core_dynrec/risc_x86.h" +#elif C_TARGETCPU == MIPSEL +#include "core_dynrec/risc_mipsel32.h" +#elif (C_TARGETCPU == ARMV4LE) || (C_TARGETCPU == ARMV7LE) +#include "core_dynrec/risc_armv4le.h" +#elif C_TARGETCPU == ARMV8LE +#include "core_dynrec/risc_armv8le.h" +#endif + +#include "core_dynrec/decoder.h" + +CacheBlockDynRec * LinkBlocks(BlockReturn ret) { + CacheBlockDynRec * block=NULL; + // the last instruction was a control flow modifying instruction + Bitu temp_ip=SegPhys(cs)+reg_eip; + CodePageHandlerDynRec * temp_handler=(CodePageHandlerDynRec *)get_tlb_readhandler(temp_ip); + if (temp_handler->flags & PFLAG_HASCODE) { + // see if the target is an already translated block + block=temp_handler->FindCacheBlock(temp_ip & 4095); + if (!block) return NULL; + + // found it, link the current block to + cache.block.running->LinkTo(ret==BR_Link2,block); + return block; + } + return NULL; +} + +/* + The core tries to find the block that should be executed next. + If such a block is found, it is run, otherwise the instruction + stream starting at ip_point is translated (see decoder.h) and + makes up a new code block that will be run. + When control is returned to CPU_Core_Dynrec_Run (which might + be right after the block is run, or somewhen long after that + due to the direct cacheblock linking) the returncode decides + the next action. This might be continuing the translation and + execution process, or returning from the core etc. +*/ + +Bits CPU_Core_Dynrec_Run(void) { + for (;;) { + // Determine the linear address of CS:EIP + PhysPt ip_point=SegPhys(cs)+reg_eip; + #if C_HEAVY_DEBUG + if (DEBUG_HeavyIsBreakpoint()) return debugCallback; + #endif + + CodePageHandlerDynRec * chandler=0; + // see if the current page is present and contains code + if (GCC_UNLIKELY(MakeCodePage(ip_point,chandler))) { + // page not present, throw the exception + CPU_Exception(cpu.exception.which,cpu.exception.error); + continue; + } + + // page doesn't contain code or is special + if (GCC_UNLIKELY(!chandler)) return CPU_Core_Normal_Run(); + + // find correct Dynamic Block to run + CacheBlockDynRec * block=chandler->FindCacheBlock(ip_point&4095); + if (!block) { + // no block found, thus translate the instruction stream + // unless the instruction is known to be modified + if (!chandler->invalidation_map || (chandler->invalidation_map[ip_point&4095]<4)) { + // translate up to 32 instructions + block=CreateCacheBlock(chandler,ip_point,32); + } else { + // let the normal core handle this instruction to avoid zero-sized blocks + Bitu old_cycles=CPU_Cycles; + CPU_Cycles=1; + Bits nc_retcode=CPU_Core_Normal_Run(); + if (!nc_retcode) { + CPU_Cycles=old_cycles-1; + continue; + } + CPU_CycleLeft+=old_cycles; + return nc_retcode; + } + } + +run_block: + cache.block.running=0; + // now we're ready to run the dynamic code block +// BlockReturn ret=((BlockReturn (*)(void))(block->cache.start))(); + BlockReturn ret=core_dynrec.runcode(block->cache.start); + + switch (ret) { + case BR_Iret: +#if C_DEBUG +#if C_HEAVY_DEBUG + if (DEBUG_HeavyIsBreakpoint()) return debugCallback; +#endif +#endif + if (!GETFLAG(TF)) { + if (GETFLAG(IF) && PIC_IRQCheck) return CBRET_NONE; + break; + } + // trapflag is set, switch to the trap-aware decoder + cpudecoder=CPU_Core_Dynrec_Trap_Run; + return CBRET_NONE; + + case BR_Normal: + // the block was exited due to a non-predictable control flow + // modifying instruction (like ret) or some nontrivial cpu state + // changing instruction (for example switch to/from pmode), + // or the maximum number of instructions to translate was reached +#if C_DEBUG +#if C_HEAVY_DEBUG + if (DEBUG_HeavyIsBreakpoint()) return debugCallback; +#endif +#endif + break; + + case BR_Cycles: + // cycles went negative, return from the core to handle + // external events, schedule the pic... +#if C_DEBUG +#if C_HEAVY_DEBUG + if (DEBUG_HeavyIsBreakpoint()) return debugCallback; +#endif +#endif + return CBRET_NONE; + + case BR_CallBack: + // the callback code is executed in dosbox.conf, return the callback number + FillFlags(); + return core_dynrec.callback; + + case BR_SMCBlock: +// LOG_MSG("selfmodification of running block at %x:%x",SegValue(cs),reg_eip); + cpu.exception.which=0; + // fallthrough, let the normal core handle the block-modifying instruction + case BR_Opcode: + // some instruction has been encountered that could not be translated + // (thus it is not part of the code block), the normal core will + // handle this instruction + CPU_CycleLeft+=CPU_Cycles; + CPU_Cycles=1; + return CPU_Core_Normal_Run(); + +#if (C_DEBUG) + case BR_OpcodeFull: + CPU_CycleLeft+=CPU_Cycles; + CPU_Cycles=1; + return CPU_Core_Full_Run(); +#endif + + case BR_Link1: + case BR_Link2: + block=LinkBlocks(ret); + if (block) goto run_block; + break; + + default: + E_Exit("Invalid return code %d", ret); + } + } + return CBRET_NONE; +} + +Bits CPU_Core_Dynrec_Trap_Run(void) { + Bits oldCycles = CPU_Cycles; + CPU_Cycles = 1; + cpu.trap_skip = false; + + // let the normal core execute the next (only one!) instruction + Bits ret=CPU_Core_Normal_Run(); + + // trap to int1 unless the last instruction deferred this + // (allows hardware interrupts to be served without interaction) + if (!cpu.trap_skip) CPU_HW_Interrupt(1); + + CPU_Cycles = oldCycles-1; + // continue (either the trapflag was clear anyways, or the int1 cleared it) + cpudecoder = &CPU_Core_Dynrec_Run; + + return ret; +} + +void CPU_Core_Dynrec_Init(void) { +} + +void CPU_Core_Dynrec_Cache_Init(bool enable_cache) { + // Initialize code cache and dynamic blocks + cache_init(enable_cache); +} + +void CPU_Core_Dynrec_Cache_Close(void) { + cache_close(); +} + +#endif diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am index f135543e8fc2a7e3ee72f7c1ea1bea680b66baf4..57e7258eef0cce7bc427a58dbf27dae0f4d654a1 100644 --- a/src/cpu/core_dynrec/Makefile.am +++ b/src/cpu/core_dynrec/Makefile.am @@ -2,4 +2,5 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \ dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ risc_armv4le.h risc_armv4le-common.h \ risc_armv4le-o3.h risc_armv4le-thumb.h \ - risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h + risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \ + risc_ppc.h diff --git a/src/cpu/core_dynrec/cache.h b/src/cpu/core_dynrec/cache.h index 3637eee80b24267c58b6dfb0b278057b80d706a0..fd9e9f03afc09e13443d6af6f16566270cbaea4a 100644 --- a/src/cpu/core_dynrec/cache.h +++ b/src/cpu/core_dynrec/cache.h @@ -179,7 +179,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writew(&invalidation_map[addr], host_readw(&invalidation_map[addr])+0x101); #else @@ -201,7 +201,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writed(&invalidation_map[addr], host_readd(&invalidation_map[addr])+0x1010101); #else @@ -248,7 +248,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writew(&invalidation_map[addr], host_readw(&invalidation_map[addr])+0x101); #else @@ -277,7 +277,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writed(&invalidation_map[addr], host_readd(&invalidation_map[addr])+0x1010101); #else @@ -585,6 +585,8 @@ static INLINE void cache_addq(Bit64u val) { static void dyn_return(BlockReturn retcode,bool ret_exception); static void dyn_run_code(void); +static void cache_block_before_close(void); +static void cache_block_closing(Bit8u* block_start,Bitu block_size); /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */ @@ -656,18 +658,26 @@ static void cache_init(bool enable) { } // setup the default blocks for block linkage returns cache.pos=&cache_code_link_blocks[0]; + core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; + // can use up to PAGESIZE_TEMP-64 bytes + dyn_run_code(); + cache_block_before_close(); + + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64]; link_blocks[0].cache.start=cache.pos; // link code that returns with a special return code + // must be less than 32 bytes dyn_return(BR_Link1,false); - cache.pos=&cache_code_link_blocks[32]; + cache_block_before_close(); + + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32]; link_blocks[1].cache.start=cache.pos; // link code that returns with a special return code + // must be less than 32 bytes dyn_return(BR_Link2,false); + cache_block_before_close(); - cache.pos=&cache_code_link_blocks[64]; - core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; -// link_blocks[1].cache.start=cache.pos; - dyn_run_code(); + cache_block_closing(cache_code_link_blocks, PAGESIZE_TEMP); cache.free_pages=0; cache.last_page=0; diff --git a/src/cpu/core_dynrec/cache.h.orig b/src/cpu/core_dynrec/cache.h.orig new file mode 100644 index 0000000000000000000000000000000000000000..e0f67519df80dc6f77a4eac1595a41c62806cc51 --- /dev/null +++ b/src/cpu/core_dynrec/cache.h.orig @@ -0,0 +1,707 @@ +/* + * Copyright (C) 2002-2019 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_LIBNX +#include "../../../switch/mman.h" +#endif + +#ifdef VITA +#include <psp2/kernel/sysmem.h> +static int sceBlock; +#endif + +class CodePageHandlerDynRec; // forward + +// basic cache block representation +class CacheBlockDynRec { +public: + void Clear(void); + // link this cache block to another block, index specifies the code + // path (always zero for unconditional links, 0/1 for conditional ones + void LinkTo(Bitu index,CacheBlockDynRec * toblock) { + assert(toblock); + link[index].to=toblock; + link[index].next=toblock->link[index].from; // set target block + toblock->link[index].from=this; // remember who links me + } + struct { + Bit16u start,end; // where in the page is the original code + CodePageHandlerDynRec * handler; // page containing this code + } page; + struct { + Bit8u * start; // where in the cache are we + Bitu size; + CacheBlockDynRec * next; + // writemap masking maskpointer/start/length + // to allow holes in the writemap + Bit8u * wmapmask; + Bit16u maskstart; + Bit16u masklen; + } cache; + struct { + Bitu index; + CacheBlockDynRec * next; + } hash; + struct { + CacheBlockDynRec * to; // this block can transfer control to the to-block + CacheBlockDynRec * next; + CacheBlockDynRec * from; // the from-block can transfer control to this block + } link[2]; // maximum two links (conditional jumps) + CacheBlockDynRec * crossblock; +}; + +static struct { + struct { + CacheBlockDynRec * first; // the first cache block in the list + CacheBlockDynRec * active; // the current cache block + CacheBlockDynRec * free; // pointer to the free list + CacheBlockDynRec * running; // the last block that was entered for execution + } block; + Bit8u * pos; // position in the cache block + CodePageHandlerDynRec * free_pages; // pointer to the free list + CodePageHandlerDynRec * used_pages; // pointer to the list of used pages + CodePageHandlerDynRec * last_page; // the last used page +} cache; + + +// cache memory pointers, to be malloc'd later +static Bit8u * cache_code_start_ptr=NULL; +static Bit8u * cache_code=NULL; +static Bit8u * cache_code_link_blocks=NULL; + +static CacheBlockDynRec * cache_blocks=NULL; +static CacheBlockDynRec link_blocks[2]; // default linking (specially marked) + + +// the CodePageHandlerDynRec class provides access to the contained +// cache blocks and intercepts writes to the code for special treatment +class CodePageHandlerDynRec : public PageHandler { +public: + CodePageHandlerDynRec() { + invalidation_map=NULL; + } + + void SetupAt(Bitu _phys_page,PageHandler * _old_pagehandler) { + // initialize this codepage handler + phys_page=_phys_page; + // save the old pagehandler to provide direct read access to the memory, + // and to be able to restore it later on + old_pagehandler=_old_pagehandler; + + // adjust flags + flags=old_pagehandler->flags|PFLAG_HASCODE; + flags&=~PFLAG_WRITEABLE; + + active_blocks=0; + active_count=16; + + // initialize the maps with zero (no cache blocks as well as code present) + memset(&hash_map,0,sizeof(hash_map)); + memset(&write_map,0,sizeof(write_map)); + if (invalidation_map!=NULL) { + free(invalidation_map); + invalidation_map=NULL; + } + } + + // clear out blocks that contain code which has been modified + bool InvalidateRange(Bitu start,Bitu end) { + Bits index=1+(end>>DYN_HASH_SHIFT); + bool is_current_block=false; // if the current block is modified, it has to be exited as soon as possible + + Bit32u ip_point=SegPhys(cs)+reg_eip; + ip_point=(PAGING_GetPhysicalPage(ip_point)-(phys_page<<12))+(ip_point&0xfff); + while (index>=0) { + Bitu map=0; + // see if there is still some code in the range + for (Bitu count=start;count<=end;count++) map+=write_map[count]; + if (!map) return is_current_block; // no more code, finished + + CacheBlockDynRec * block=hash_map[index]; + while (block) { + CacheBlockDynRec * nextblock=block->hash.next; + // test if this block is in the range + if (start<=block->page.end && end>=block->page.start) { + if (ip_point<=block->page.end && ip_point>=block->page.start) is_current_block=true; + block->Clear(); // clear the block, decrements the write_map accordingly + } + block=nextblock; + } + index--; + } + return is_current_block; + } + + // the following functions will clean all cache blocks that are invalid now due to the write + void writeb(PhysPt addr,Bitu val){ + addr&=4095; + if (host_readb(hostmem+addr)==(Bit8u)val) return; + host_writeb(hostmem+addr,val); + // see if there's code where we are writing to + if (!host_readb(&write_map[addr])) { + if (active_blocks) return; // still some blocks in this page + active_count--; + if (!active_count) Release(); // delay page releasing until active_count is zero + return; + } else if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } + invalidation_map[addr]++; + InvalidateRange(addr,addr); + } + void writew(PhysPt addr,Bitu val){ + addr&=4095; + if (host_readw(hostmem+addr)==(Bit16u)val) return; + host_writew(hostmem+addr,val); + // see if there's code where we are writing to + if (!host_readw(&write_map[addr])) { + if (active_blocks) return; // still some blocks in this page + active_count--; + if (!active_count) Release(); // delay page releasing until active_count is zero + return; + } else if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) + host_writew(&invalidation_map[addr], + host_readw(&invalidation_map[addr])+0x101); +#else + (*(Bit16u*)&invalidation_map[addr])+=0x101; +#endif + InvalidateRange(addr,addr+1); + } + void writed(PhysPt addr,Bitu val){ + addr&=4095; + if (host_readd(hostmem+addr)==(Bit32u)val) return; + host_writed(hostmem+addr,val); + // see if there's code where we are writing to + if (!host_readd(&write_map[addr])) { + if (active_blocks) return; // still some blocks in this page + active_count--; + if (!active_count) Release(); // delay page releasing until active_count is zero + return; + } else if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) + host_writed(&invalidation_map[addr], + host_readd(&invalidation_map[addr])+0x1010101); +#else + (*(Bit32u*)&invalidation_map[addr])+=0x1010101; +#endif + InvalidateRange(addr,addr+3); + } + bool writeb_checked(PhysPt addr,Bitu val) { + addr&=4095; + if (host_readb(hostmem+addr)==(Bit8u)val) return false; + // see if there's code where we are writing to + if (!host_readb(&write_map[addr])) { + if (!active_blocks) { + // no blocks left in this page, still delay the page releasing a bit + active_count--; + if (!active_count) Release(); + } + } else { + if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } + invalidation_map[addr]++; + if (InvalidateRange(addr,addr)) { + cpu.exception.which=SMC_CURRENT_BLOCK; + return true; + } + } + host_writeb(hostmem+addr,val); + return false; + } + bool writew_checked(PhysPt addr,Bitu val) { + addr&=4095; + if (host_readw(hostmem+addr)==(Bit16u)val) return false; + // see if there's code where we are writing to + if (!host_readw(&write_map[addr])) { + if (!active_blocks) { + // no blocks left in this page, still delay the page releasing a bit + active_count--; + if (!active_count) Release(); + } + } else { + if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) + host_writew(&invalidation_map[addr], + host_readw(&invalidation_map[addr])+0x101); +#else + (*(Bit16u*)&invalidation_map[addr])+=0x101; +#endif + if (InvalidateRange(addr,addr+1)) { + cpu.exception.which=SMC_CURRENT_BLOCK; + return true; + } + } + host_writew(hostmem+addr,val); + return false; + } + bool writed_checked(PhysPt addr,Bitu val) { + addr&=4095; + if (host_readd(hostmem+addr)==(Bit32u)val) return false; + // see if there's code where we are writing to + if (!host_readd(&write_map[addr])) { + if (!active_blocks) { + // no blocks left in this page, still delay the page releasing a bit + active_count--; + if (!active_count) Release(); + } + } else { + if (!invalidation_map) { + invalidation_map=(Bit8u*)malloc(4096); + memset(invalidation_map,0,4096); + } +#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) + host_writed(&invalidation_map[addr], + host_readd(&invalidation_map[addr])+0x1010101); +#else + (*(Bit32u*)&invalidation_map[addr])+=0x1010101; +#endif + if (InvalidateRange(addr,addr+3)) { + cpu.exception.which=SMC_CURRENT_BLOCK; + return true; + } + } + host_writed(hostmem+addr,val); + return false; + } + + // add a cache block to this page and note it in the hash map + void AddCacheBlock(CacheBlockDynRec * block) { + Bitu index=1+(block->page.start>>DYN_HASH_SHIFT); + block->hash.next=hash_map[index]; // link to old block at index from the new block + block->hash.index=index; + hash_map[index]=block; // put new block at hash position + block->page.handler=this; + active_blocks++; + } + // there's a block whose code started in a different page + void AddCrossBlock(CacheBlockDynRec * block) { + block->hash.next=hash_map[0]; + block->hash.index=0; + hash_map[0]=block; + block->page.handler=this; + active_blocks++; + } + // remove a cache block + void DelCacheBlock(CacheBlockDynRec * block) { + active_blocks--; + active_count=16; + CacheBlockDynRec * * bwhere=&hash_map[block->hash.index]; + while (*bwhere!=block) { + bwhere=&((*bwhere)->hash.next); + //Will crash if a block isn't found, which should never happen. + } + *bwhere=block->hash.next; + + // remove the cleared block from the write map + if (GCC_UNLIKELY(block->cache.wmapmask!=NULL)) { + // first part is not influenced by the mask + for (Bitu i=block->page.start;i<block->cache.maskstart;i++) { + if (write_map[i]) write_map[i]--; + } + Bitu maskct=0; + // last part sticks to the writemap mask + for (Bitu i=block->cache.maskstart;i<=block->page.end;i++,maskct++) { + if (write_map[i]) { + // only adjust writemap if it isn't masked + if ((maskct>=block->cache.masklen) || (!block->cache.wmapmask[maskct])) write_map[i]--; + } + } + free(block->cache.wmapmask); + block->cache.wmapmask=NULL; + } else { + for (Bitu i=block->page.start;i<=block->page.end;i++) { + if (write_map[i]) write_map[i]--; + } + } + } + + void Release(void) { + MEM_SetPageHandler(phys_page,1,old_pagehandler); // revert to old handler + PAGING_ClearTLB(); + + // remove page from the lists + if (prev) prev->next=next; + else cache.used_pages=next; + if (next) next->prev=prev; + else cache.last_page=prev; + next=cache.free_pages; + cache.free_pages=this; + prev=0; + } + void ClearRelease(void) { + // clear out all cache blocks in this page + for (Bitu index=0;index<(1+DYN_PAGE_HASH);index++) { + CacheBlockDynRec * block=hash_map[index]; + while (block) { + CacheBlockDynRec * nextblock=block->hash.next; + block->page.handler=0; // no need, full clear + block->Clear(); + block=nextblock; + } + } + Release(); // now can release this page + } + + CacheBlockDynRec * FindCacheBlock(Bitu start) { + CacheBlockDynRec * block=hash_map[1+(start>>DYN_HASH_SHIFT)]; + // see if there's a cache block present at the start address + while (block) { + if (block->page.start==start) return block; // found + block=block->hash.next; + } + return 0; // none found + } + + HostPt GetHostReadPt(Bitu phys_page) { + hostmem=old_pagehandler->GetHostReadPt(phys_page); + return hostmem; + } + HostPt GetHostWritePt(Bitu phys_page) { + return GetHostReadPt( phys_page ); + } +public: + // the write map, there are write_map[i] cache blocks that cover the byte at address i + Bit8u write_map[4096]; + Bit8u * invalidation_map; + CodePageHandlerDynRec * next, * prev; // page linking +private: + PageHandler * old_pagehandler; + + // hash map to quickly find the cache blocks in this page + CacheBlockDynRec * hash_map[1+DYN_PAGE_HASH]; + + Bitu active_blocks; // the number of cache blocks in this page + Bitu active_count; // delaying parameter to not immediately release a page + HostPt hostmem; + Bitu phys_page; +}; + + +static INLINE void cache_addunusedblock(CacheBlockDynRec * block) { + // block has become unused, add it to the freelist + block->cache.next=cache.block.free; + cache.block.free=block; +} + +static CacheBlockDynRec * cache_getblock(void) { + // get a free cache block and advance the free pointer + CacheBlockDynRec * ret=cache.block.free; + if (!ret) E_Exit("Ran out of CacheBlocks" ); + cache.block.free=ret->cache.next; + ret->cache.next=0; + return ret; +} + +void CacheBlockDynRec::Clear(void) { + Bitu ind; + // check if this is not a cross page block + if (hash.index) for (ind=0;ind<2;ind++) { + CacheBlockDynRec * fromlink=link[ind].from; + link[ind].from=0; + while (fromlink) { + CacheBlockDynRec * nextlink=fromlink->link[ind].next; + // clear the next-link and let the block point to the standard linkcode + fromlink->link[ind].next=0; + fromlink->link[ind].to=&link_blocks[ind]; + + fromlink=nextlink; + } + if (link[ind].to!=&link_blocks[ind]) { + // not linked to the standard linkcode, find the block that links to this block + CacheBlockDynRec * * wherelink=&link[ind].to->link[ind].from; + while (*wherelink != this && *wherelink) { + wherelink = &(*wherelink)->link[ind].next; + } + // now remove the link + if(*wherelink) + *wherelink = (*wherelink)->link[ind].next; + else { + LOG(LOG_CPU,LOG_ERROR)("Cache anomaly. please investigate"); + } + } + } else + cache_addunusedblock(this); + if (crossblock) { + // clear out the crossblock (in the page before) as well + crossblock->crossblock=0; + crossblock->Clear(); + crossblock=0; + } + if (page.handler) { + // clear out the code page handler + page.handler->DelCacheBlock(this); + page.handler=0; + } + if (cache.wmapmask){ + free(cache.wmapmask); + cache.wmapmask=NULL; + } +} + + +static CacheBlockDynRec * cache_openblock(void) { + CacheBlockDynRec * block=cache.block.active; + // check for enough space in this block + Bitu size=block->cache.size; + CacheBlockDynRec * nextblock=block->cache.next; + if (block->page.handler) + block->Clear(); + // block size must be at least CACHE_MAXSIZE + while (size<CACHE_MAXSIZE) { + if (!nextblock) + goto skipresize; + // merge blocks + size+=nextblock->cache.size; + CacheBlockDynRec * tempblock=nextblock->cache.next; + if (nextblock->page.handler) + nextblock->Clear(); + // block is free now + cache_addunusedblock(nextblock); + nextblock=tempblock; + } +skipresize: + // adjust parameters and open this block + block->cache.size=size; + block->cache.next=nextblock; + cache.pos=block->cache.start; + return block; +} + +static void cache_closeblock(void) { + CacheBlockDynRec * block=cache.block.active; + // links point to the default linking code + block->link[0].to=&link_blocks[0]; + block->link[1].to=&link_blocks[1]; + block->link[0].from=0; + block->link[1].from=0; + block->link[0].next=0; + block->link[1].next=0; + // close the block with correct alignment + Bitu written=(Bitu)(cache.pos-block->cache.start); + if (written>block->cache.size) { + if (!block->cache.next) { + if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size); + } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size); + } else { + Bitu new_size; + Bitu left=block->cache.size-written; + // smaller than cache align then don't bother to resize + if (left>CACHE_ALIGN) { + new_size=((written-1)|(CACHE_ALIGN-1))+1; + CacheBlockDynRec * newblock=cache_getblock(); + // align block now to CACHE_ALIGN + newblock->cache.start=block->cache.start+new_size; + newblock->cache.size=block->cache.size-new_size; + newblock->cache.next=block->cache.next; + block->cache.next=newblock; + block->cache.size=new_size; + } + } + // advance the active block pointer + if (!block->cache.next || (block->cache.next->cache.start>(cache_code_start_ptr + CACHE_TOTAL - CACHE_MAXSIZE))) { +// LOG_MSG("Cache full restarting"); + cache.block.active=cache.block.first; + } else { + cache.block.active=block->cache.next; + } +} + + +// place an 8bit value into the cache +static INLINE void cache_addb(Bit8u val) { +#ifdef HAVE_LIBNX + Bit8u* rwPos = (Bit8u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr); + *rwPos=val; + cache.pos++; +#else + *cache.pos++=val; +#endif +} + +// place a 16bit value into the cache +static INLINE void cache_addw(Bit16u val) { +#ifdef HAVE_LIBNX + Bit16u* rwPos = (Bit16u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr); + *rwPos=val; + cache.pos+=2; +#else + *(Bit16u*)cache.pos=val; + cache.pos+=2; +#endif +} + +// place a 32bit value into the cache +static INLINE void cache_addd(Bit32u val) { +#ifdef HAVE_LIBNX + Bit32u* rwPos = (Bit32u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr); + *rwPos=val; + cache.pos+=4; +#else + *(Bit32u*)cache.pos=val; + cache.pos+=4; +#endif +} + +// place a 64bit value into the cache +static INLINE void cache_addq(Bit64u val) { +#ifdef HAVE_LIBNX + Bit64u* rwPos = (Bit64u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr); + *rwPos=val; + cache.pos+=8; +#else + *(Bit64u*)cache.pos=val; + cache.pos+=8; +#endif +} + + +static void dyn_return(BlockReturn retcode,bool ret_exception); +static void dyn_run_code(void); + + +/* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */ +#if (C_HAVE_MPROTECT) +#define PAGESIZE_TEMP PAGESIZE +#else +#define PAGESIZE_TEMP 4096 +#endif + +static bool cache_initialized = false; + +static void cache_init(bool enable) { + Bits i; + if (enable) { + // see if cache is already initialized + if (cache_initialized) return; + cache_initialized = true; + if (cache_blocks == NULL) { + // allocate the cache blocks memory + cache_blocks=(CacheBlockDynRec*)malloc(CACHE_BLOCKS*sizeof(CacheBlockDynRec)); + if(!cache_blocks) E_Exit("Allocating cache_blocks has failed"); + memset(cache_blocks,0,sizeof(CacheBlockDynRec)*CACHE_BLOCKS); + cache.block.free=&cache_blocks[0]; + // initialize the cache blocks + for (i=0;i<CACHE_BLOCKS-1;i++) { + cache_blocks[i].link[0].to=(CacheBlockDynRec *)1; + cache_blocks[i].link[1].to=(CacheBlockDynRec *)1; + cache_blocks[i].cache.next=&cache_blocks[i+1]; + } + } + if (cache_code_start_ptr==NULL) { + // allocate the code cache memory +#if defined (WIN32) + cache_code_start_ptr=(Bit8u*)VirtualAlloc(0,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP, + MEM_COMMIT,PAGE_EXECUTE_READWRITE); + if (!cache_code_start_ptr) + cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP); +#elif defined (HAVE_LIBNX) + cache_code_start_ptr=(Bit8u*)mmap(NULL, CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP, 0, 0, 0, 0); +#elif defined (VITA) + sceBlock = sceKernelAllocMemBlockForVM("code", CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP); + if (sceBlock >= 0) { + int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&cache_code_start_ptr); + if(ret < 0) { + cache_code_start_ptr = null; + } + } +#else + cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP); +#endif + if(!cache_code_start_ptr) E_Exit("Allocating dynamic cache failed"); + + // align the cache at a page boundary + cache_code=(Bit8u*)(((Bitu)cache_code_start_ptr + PAGESIZE_TEMP-1) & ~(PAGESIZE_TEMP-1));//Bitu is same size as a pointer. + + cache_code_link_blocks=cache_code; + cache_code=cache_code+PAGESIZE_TEMP; + +#if (C_HAVE_MPROTECT) + if(mprotect(cache_code_link_blocks,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP,PROT_WRITE|PROT_READ|PROT_EXEC)) + LOG_MSG("Setting execute permission on the code cache has failed"); +#endif + CacheBlockDynRec * block=cache_getblock(); + cache.block.first=block; + cache.block.active=block; + block->cache.start=&cache_code[0]; + block->cache.size=CACHE_TOTAL; + block->cache.next=0; // last block in the list + } + // setup the default blocks for block linkage returns + cache.pos=&cache_code_link_blocks[0]; + link_blocks[0].cache.start=cache.pos; + // link code that returns with a special return code + dyn_return(BR_Link1,false); + cache.pos=&cache_code_link_blocks[32]; + link_blocks[1].cache.start=cache.pos; + // link code that returns with a special return code + dyn_return(BR_Link2,false); + + cache.pos=&cache_code_link_blocks[64]; + core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; +// link_blocks[1].cache.start=cache.pos; + dyn_run_code(); + + cache.free_pages=0; + cache.last_page=0; + cache.used_pages=0; + // setup the code pages + for (i=0;i<CACHE_PAGES;i++) { + CodePageHandlerDynRec * newpage=new CodePageHandlerDynRec(); + newpage->next=cache.free_pages; + cache.free_pages=newpage; + } + } +} + +static void cache_close(void) { +/* for (;;) { + if (cache.used_pages) { + CodePageHandler * cpage=cache.used_pages; + CodePageHandler * npage=cache.used_pages->next; + cpage->ClearRelease(); + delete cpage; + cache.used_pages=npage; + } else break; + } + if (cache_blocks != NULL) { + free(cache_blocks); + cache_blocks = NULL; + } + if (cache_code_start_ptr != NULL) { + ### care: under windows VirtualFree() has to be used if + ### VirtualAlloc was used for memory allocation + free(cache_code_start_ptr); + cache_code_start_ptr = NULL; + } + cache_code = NULL; + cache_code_link_blocks = NULL; + cache_initialized = false; */ +} diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h index c8e2a8ef2cecc93db280a6edc74a3621645d215c..95488575bbdfe69b43d41782e5908d2d5fe1b0d3 100644 --- a/src/cpu/core_dynrec/decoder_basic.h +++ b/src/cpu/core_dynrec/decoder_basic.h @@ -995,10 +995,10 @@ skip_extend_word: // succeeded, use the pointer to avoid code invalidation if (!addseg) { if (!scaled_reg_used) { - gen_mov_word_to_reg(ea_reg,(void*)val,true); + gen_mov_LE_word_to_reg(ea_reg,(void*)val,true); } else { DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } } else { if (!scaled_reg_used) { @@ -1006,7 +1006,7 @@ skip_extend_word: } else { DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); } - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } @@ -1047,10 +1047,10 @@ skip_extend_word: if (!addseg) { if (!scaled_reg_used) { MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } else { DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } } else { if (!scaled_reg_used) { @@ -1059,7 +1059,7 @@ skip_extend_word: DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); } ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } @@ -1124,11 +1124,11 @@ skip_extend_word: // succeeded, use the pointer to avoid code invalidation if (!addseg) { MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } else { MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base)); ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } diff --git a/src/cpu/core_dynrec/decoder_opcodes.h b/src/cpu/core_dynrec/decoder_opcodes.h index 09e356313efb699e555662b7d261be34ff18084b..ae13433155e4fd9e3da1937f607dc908d0a268f0 100644 --- a/src/cpu/core_dynrec/decoder_opcodes.h +++ b/src/cpu/core_dynrec/decoder_opcodes.h @@ -250,12 +250,12 @@ static void dyn_prep_word_imm(Bit8u reg) { Bitu val; if (decode.big_op) { if (decode_fetchd_imm(val)) { - gen_mov_word_to_reg(FC_OP2,(void*)val,true); + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true); return; } } else { if (decode_fetchw_imm(val)) { - gen_mov_word_to_reg(FC_OP2,(void*)val,false); + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false); return; } } @@ -287,13 +287,13 @@ static void dyn_mov_word_imm(Bit8u reg) { Bitu val; if (decode.big_op) { if (decode_fetchd_imm(val)) { - gen_mov_word_to_reg(FC_OP1,(void*)val,true); + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true); MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg); return; } } else { if (decode_fetchw_imm(val)) { - gen_mov_word_to_reg(FC_OP1,(void*)val,false); + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false); MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg); return; } @@ -330,7 +330,7 @@ static void dyn_mov_byte_direct_al() { if (decode.big_addr) { Bitu val; if (decode_fetchd_imm(val)) { - gen_add(FC_ADDR,(void*)val); + gen_add_LE(FC_ADDR,(void*)val); } else { gen_add_imm(FC_ADDR,(Bit32u)val); } @@ -1179,7 +1179,7 @@ static void dyn_ret_near(Bitu bytes) { gen_call_function_raw((void*)&dynrec_pop_word); gen_extend_word(false,FC_RETOP); } - gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(®_eip):(void*)(®_ip),true); + gen_mov_word_from_reg(FC_RETOP,(void*)(®_eip),true); if (bytes) gen_add_direct_word(®_esp,bytes,true); dyn_return(BR_Normal); diff --git a/src/cpu/core_dynrec/risc_ppc.h b/src/cpu/core_dynrec/risc_ppc.h new file mode 100644 index 0000000000000000000000000000000000000000..b43dbfd3179a2c823510616d2ce6ff6ddf8bb79d --- /dev/null +++ b/src/cpu/core_dynrec/risc_ppc.h @@ -0,0 +1,897 @@ +/* + * Copyright (C) 2002-2019 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +// some configuring defines that specify the capabilities of this architecture +// or aspects of the recompiling + +// protect FC_ADDR over function calls if necessaray +//#define DRC_PROTECT_ADDR_REG + +// try to use non-flags generating functions if possible +#define DRC_FLAGS_INVALIDATION +// try to replace _simple functions by code +#define DRC_FLAGS_INVALIDATION_DCODE + +// type with the same size as a pointer +#define DRC_PTR_SIZE_IM Bit32u + +// calling convention modifier +#define DRC_FC /* nothing */ +#define DRC_CALL_CONV /* nothing */ + +#define DRC_USE_REGS_ADDR +#define DRC_USE_SEGS_ADDR + +// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value) +#define USE_SDA_BASE + +// register mapping +enum HostReg { + HOST_R0=0, + HOST_R1, + HOST_R2, + HOST_R3, + HOST_R4, + HOST_R5, + HOST_R6, + HOST_R7, + HOST_R8, + HOST_R9, + HOST_R10, + HOST_R11, + HOST_R12, + HOST_R13, + HOST_R14, + HOST_R15, + HOST_R16, + HOST_R17, + HOST_R18, + HOST_R19, + HOST_R20, + HOST_R21, + HOST_R22, + HOST_R23, + HOST_R24, + HOST_R25, + HOST_R26, + HOST_R27, + HOST_R28, + HOST_R29, + HOST_R30, + HOST_R31, + + HOST_NONE +}; + +static const HostReg RegParams[] = { + HOST_R3, HOST_R4, HOST_R5, HOST_R6, + HOST_R7, HOST_R8, HOST_R9, HOST_R10 +}; + +#if C_FPU +extern struct FPU_rec fpu; +#endif + +#if defined(USE_SDA_BASE) +extern Bit32u _SDA_BASE_[]; +#endif + +// register that holds function return values +#define FC_RETOP HOST_R3 + +// register used for address calculations, if the ABI does not +// state that this register is preserved across function calls +// then define DRC_PROTECT_ADDR_REG above +#define FC_ADDR HOST_R29 + +// register that points to Segs[] +#define FC_SEGS_ADDR HOST_R30 +// register that points to cpu_regs[] +#define FC_REGS_ADDR HOST_R31 + +// register that holds the first parameter +#define FC_OP1 RegParams[0] + +// register that holds the second parameter +#define FC_OP2 RegParams[1] + +// special register that holds the third parameter for _R3 calls (byte accessible) +#define FC_OP3 RegParams[2] + +// register that holds byte-accessible temporary values +//#define FC_TMP_BA1 HOST_R6 +#define FC_TMP_BA1 FC_OP2 + +// register that holds byte-accessible temporary values +//#define FC_TMP_BA2 HOST_R7 +#define FC_TMP_BA2 FC_OP1 + +// temporary register for LEA +#define TEMP_REG_DRC HOST_R10 + +#define IMM(op, regsd, rega, imm) (((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit32u)(imm))&0xFFFF)) +#define EXT(regsd, rega, regb, op, rc) ( (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)| ((op)<<1)|(rc)) +#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc)) + +#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm)) +#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc)) +#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc)) + +// move a full register from reg_src to reg_dst +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) +{ + if (reg_dst != reg_src) + EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src) +} + +// move a 16bit constant value into dest_reg +// the upper 16bit of the destination register may be destroyed +static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) +{ + IMM_OP(14, dest_reg, 0, imm); // li dest,imm +} + +DRC_PTR_SIZE_IM block_ptr; + +// Helper for loading addresses +static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest) +{ + Bit32s off; + + if ((Bit16s)addr == addr) + return HOST_R0; + + off = addr - (Bit32s)&Segs; + if ((Bit16s)off == off) + { + addr = off; + return FC_SEGS_ADDR; + } + + off = addr - (Bit32s)&cpu_regs; + if ((Bit16s)off == off) + { + addr = off; + return FC_REGS_ADDR; + } + + off = addr - (Bit32s)block_ptr; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R27; + } + +#if C_FPU + off = addr - (Bit32s)&fpu; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R28; + } +#endif + +#if defined(USE_SDA_BASE) + off = addr - (Bit32s)_SDA_BASE_; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R13; + } +#endif + + IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha + addr = (Bit16s)addr; + return dest; +} + +// move a 32bit constant value into dest_reg +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) +{ + HostReg ld = gen_addr((Bit32s&)imm, dest_reg); + if (imm || ld != dest_reg) + IMM_OP(14, dest_reg, ld, imm); // addi dest_reg, ldr, imm@l +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { + Bit32s addr = (Bit32s)data; + HostReg ld = gen_addr(addr, dest_reg); + IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld) +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg +static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) { + Bit32u addr = (Bit32u)data; + gen_mov_dword_to_reg_imm(dest_reg, addr); + EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { + gen_mov_word_to_reg_imm(dest_reg, imm); +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { + gen_mov_word_to_reg_imm(dest_reg, imm); +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) +{ + Bit32s addr = (Bit32s)dest; + HostReg ld = gen_addr(addr, HOST_R8); + IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld) +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) +{ + Bit32s addr = (Bit32s)data; + HostReg ld = gen_addr(addr, dest_reg); + IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld) +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { + gen_mov_byte_to_reg_low(dest_reg, data); +} + +// move the lowest 8bit of a register into memory +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) +{ + Bit32s addr = (Bit32s)dest; + HostReg ld = gen_addr(addr, HOST_R8); + IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld) +} + +// convert an 8bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_byte(bool sign,HostReg reg) +{ + if (sign) + { + EXT_OP(reg, reg, 0, 954, 0); // extsb reg, reg + return; + } + + // check if previous instruction is "lbz reg, * + if ((*(Bit32u*)(cache.pos-4) & 0xFFE00000) != IMM(34, reg, 0, 0)) + RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, reg, 0, 24, 31 + // else register is already zero-extended +} + +// convert a 16bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_word(bool sign,HostReg reg) +{ + // check if previous instruction is "lhz reg, *" + Bit32u *op = (Bit32u*)(cache.pos-4); + if ((*op & 0xFFE00000) == IMM(40, reg, 0, 0)) + { + if (sign) // change lhz -> lha + *op |= 0x08000000; + // else zero-extension already done + return; + } + + if (sign) + EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg + else + RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31 +} + +// add a 32bit value from memory to a full register +static void gen_add(HostReg reg,void* op) +{ + gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 +} + +// add a 32bit value from host memory to a full register +static void gen_add_LE(HostReg reg,void* op) +{ + gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24); + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 +} + +// add a 32bit constant value to a full register +static void gen_add_imm(HostReg reg,Bit32u imm) +{ + if ((Bit16s)imm != (Bit32s)imm) + IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha + if ((Bit16s)imm) + IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l +} + +// and a 32bit constant value with a full register +static void gen_and_imm(HostReg reg,Bit32u imm) { + Bits sbit,ebit,tbit,bbit,abit,i; + + // sbit = number of leading 0 bits + // ebit = number of trailing 0 bits + // tbit = number of total 0 bits + // bbit = number of leading 1 bits + // abit = number of trailing 1 bits + + if (imm == 0xFFFFFFFF) + return; + + if (!imm) + return gen_mov_word_to_reg_imm(reg, 0); + + sbit = ebit = tbit = bbit = abit = 0; + for (i=0; i < 32; i++) + { + if (!(imm & (1<<(31-i)))) + { + abit = 0; + tbit++; + if (sbit == i) + sbit++; + ebit++; + } + else + { + ebit = 0; + if (bbit == i) + bbit++; + abit++; + } + } + + if (sbit >= 16) + { + IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm + return; + } + if (ebit >= 16) + { + IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16) + return; + } + + if (sbit + ebit == tbit) + { + RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit + return; + } + + if (bbit + abit == (32 - tbit)) + { + RLW_OP(21,reg,reg,0,31-abit,bbit,0); // rlwinm reg,reg,0,31-abit,bbit + return; + } + + gen_mov_dword_to_reg_imm(HOST_R8, imm); + EXT_OP(reg, reg, HOST_R8, 28, 0); // and reg, reg, r8 +} + +// move a 32bit constant value into memory +static void gen_mov_direct_dword(void* dest,Bit32u imm) { + gen_mov_dword_to_reg_imm(HOST_R9, imm); + gen_mov_word_from_reg(HOST_R9, dest, 1); +} + +// move an address into memory (assumes address != NULL) +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) +{ + block_ptr = 0; + gen_mov_dword_to_reg_imm(HOST_R27, imm); + // this will probably be used to look-up the linked blocks + block_ptr = imm; + gen_mov_word_from_reg(HOST_R27, dest, 1); +} + +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) +{ + HostReg ld; + Bit32s addr = (Bit32s)dest; + + if (!dword) + { + imm &= 0xFFFF; + addr += 2; + } + + if (!imm) + return; + + ld = gen_addr(addr, HOST_R8); + IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld) + if (dword && (Bit16s)imm != (Bit32s)imm) + IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha + if (!dword || (Bit16s)imm) + IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l + IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld) +} + +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + gen_add_direct_word(dest, -(Bit32s)imm, dword); +} + +// effective address calculation, destination is dest_reg +// scale_reg is scaled by scale (scale_reg*(2^scale)) and +// added to dest_reg, then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) +{ + if (scale) + { + RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // rlwinm r8,scale_reg,scale,0,31-scale + scale_reg = HOST_R8; + } + + gen_add_imm(dest_reg, imm); + EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled +} + +// effective address calculation, destination is dest_reg +// dest_reg is scaled by scale (dest_reg*(2^scale)), +// then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) +{ + if (scale) + RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // rlwinm dest,dest,scale,0,31-scale + + gen_add_imm(dest_reg, imm); +} + +// helper function to choose direct or indirect call +static void INLINE do_gen_call(void *func, Bit32u *pos) +{ + Bit32s f = (Bit32s)func; + Bit32s off = f - (Bit32s)pos; + + // relative branches are limited to +/- ~32MB + if (off < 0x02000000 && off >= -0x02000000) + { + pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func + pos[1] = IMM(24, 0, 0, 0); // nop + pos[2] = IMM(24, 0, 0, 0); + pos[3] = IMM(24, 0, 0, 0); + return; + } + + pos[0] = IMM(15, HOST_R8, 0, f>>16); // lis r8,imm@h + pos[1] = IMM(24, HOST_R8, HOST_R8, f); // ori r8,r8,imm@l + pos[2] = EXT(HOST_R8, 9, 0, 467, 0); // mtctr r8 + pos[3] = IMM(19, 0b10100, 0, (528<<1)|1); // bctrl +} + +// generate a call to a parameterless function +static void INLINE gen_call_function_raw(void * func) +{ + do_gen_call(func, (Bit32u*)cache.pos); + cache.pos += 16; +} + +// generate a call to a function with paramcount parameters +// note: the parameters are loaded in the architecture specific way +// using the gen_load_param_ functions below +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) +{ + Bit32u proc_addr=(Bit32u)cache.pos; + gen_call_function_raw(func); + return proc_addr; +} + +// load an immediate value as param'th function parameter +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { + gen_mov_dword_to_reg_imm(RegParams[param], imm); +} + +// load an address as param'th function parameter +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { + gen_load_param_imm(addr, param); +} + +// load a host-register as param'th function parameter +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { + gen_mov_regs(RegParams[param], (HostReg)reg); +} + +// load a value from memory as param'th function parameter +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { + gen_mov_word_to_reg(RegParams[param], (void*)mem, true); +} + +// jump to an address pointed at by ptr, offset is in imm +static void gen_jmp_ptr(void * ptr,Bits imm=0) { + gen_mov_word_to_reg(HOST_R8,ptr,true); // r8 = *(Bit32u*)ptr + if ((Bit16s)imm != (Bit32s)imm) + IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha + IMM_OP(32, HOST_R8, HOST_R8, imm); // lwz r8, imm@l(r8) + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 + IMM_OP(19, 0b10100, 0, 528<<1); // bctr +} + +// short conditional jump (+-127 bytes) if register is zero +// the destination is set by gen_fill_branch() later +static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF + else + EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg + + IMM_OP(16, 0b01100, 2, 0); // bc 12,CR0[Z] (beq) + return ((Bit32u)cache.pos-4); +} + +// short conditional jump (+-127 bytes) if register is nonzero +// the destination is set by gen_fill_branch() later +static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF + else + EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg + + IMM_OP(16, 0b00100, 2, 0); // bc 4,CR0[Z] (bne) + return ((Bit32u)cache.pos-4); +} + +// calculate relative offset and fill it into the location pointed to by data +static void gen_fill_branch(DRC_PTR_SIZE_IM data) +{ +#if C_DEBUG + Bits len=(Bit32u)cache.pos-data; + if (len<0) len=-len; + if (len >= 0x8000) LOG_MSG("Big jump %d",len); +#endif + + ((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC; +} + + +// conditional jump if register is nonzero +// for isdword==true the 32bit of the register are tested +// for isdword==false the lowest 8bit of the register are tested +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF + else + EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg + + IMM_OP(16, 0b00100, 2, 0); // bne + return ((Bit32u)cache.pos-4); +} + +// compare 32bit-register against zero and jump if value less/equal than zero +static Bit32u gen_create_branch_long_leqzero(HostReg reg) +{ + EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg + + IMM_OP(16, 0b00100, 1, 0); // ble + return ((Bit32u)cache.pos-4); +} + +// calculate long relative offset and fill it into the location pointed to by data +static void gen_fill_branch_long(Bit32u data) { + return gen_fill_branch((DRC_PTR_SIZE_IM)data); +} + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) { +#if defined(__GNUC__) + Bit8u* start = (Bit8u*)((Bit32u)block_start & -32); + + while (start < block_start + block_size) + { + asm volatile("dcbst %y0; icbi %y0" :: "Z"(*start)); + start += 32; + } + asm volatile("sync; isync"); +#else + #error "Don't know how to flush/invalidate CacheBlock with this compiler" +#endif +} + +static void cache_block_before_close(void) {} + +// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it +static Bit32s epilog_addr; +static Bit8u *getCF_glue; +static void gen_run_code(void) { + // prolog + IMM_OP(37, HOST_R1, HOST_R1, -32); // stwu sp,-32(sp) + EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 + EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0 + + IMM_OP(47, HOST_R26, HOST_R1, 8); // stmw r26, 8(sp) + + IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16); // lis FC_SEGS_ADDR, Segs@h + IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs); // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l + + IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16); // lis FC_REGS_ADDR, cpu_regs@h + IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs); // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l + +#if C_FPU + IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16); // lis r28, fpu@h + IMM_OP(24, HOST_R28, HOST_R28, &fpu); // ori r28, r28, fpu@l +#endif + + IMM_OP(36, HOST_R0, HOST_R1, 32+4); // stw r0,32+4(sp) + IMM_OP(19, 0b10100, 0, 528<<1); // bctr + + // epilog + epilog_addr = (Bit32s)cache.pos; + IMM_OP(32, HOST_R0, HOST_R1, 32+4); // lwz r0,32+4(sp) + IMM_OP(46, HOST_R26, HOST_R1, 8); // lmw r26, 8(sp) + EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0 + IMM_OP(14, HOST_R1, HOST_R1, 32); // addi sp, sp, 32 + IMM_OP(19, 0b10100, 0, 16<<1); // blr + + // trampoline to call get_CF() + getCF_glue = cache.pos; + gen_mov_dword_to_reg_imm(FC_OP1, (Bit32u)get_CF); // FC_OP1 = &get_CF + EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 + IMM_OP(19, 0b10100, 0, 528<<1); // bctr +} + +// return from a function +static void gen_return_function(void) +{ + Bit32s off = epilog_addr - (Bit32s)cache.pos; + + // relative branches are limited to +/- 32MB + if (off < 0x02000000 && off >= -0x02000000) { + cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b epilog + return; + } + + gen_mov_dword_to_reg_imm(HOST_R8, epilog_addr); + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 + IMM_OP(19, 0b10100, 0, 528<<1); // bctr +} + +// called when a call to a function can be replaced by a +// call to a simpler function +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) +{ + Bit32u *op = (Bit32u*)pos; + Bit32u *end = op+4; + + switch (flags_type) { +#if defined(DRC_FLAGS_INVALIDATION_DCODE) + // try to avoid function calls but rather directly fill in code + case t_ADDb: + case t_ADDw: + case t_ADDd: + *op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2 + break; + case t_ORb: + case t_ORw: + case t_ORd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2 + break; + case t_ADCb: + case t_ADCw: + case t_ADCd: + op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF + op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = CF!=0) + op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + CF!=0 + return; + case t_SBBb: + case t_SBBw: + case t_SBBd: + op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF + op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = CF==0) + op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + CF==0 + return; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1 + break; + case t_XORb: + case t_XORw: + case t_XORd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2 + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + break; + case t_INCb: + case t_INCw: + case t_INCd: + *op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1 + break; + case t_DECb: + case t_DECw: + case t_DECd: + *op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1 + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1 + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SHRb: + case t_SHRw: + case t_SHRd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SARb: + *op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1 + case t_SARw: + if (flags_type == t_SARw) + *op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1 + case t_SARd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_ROLb: + *op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7 + case t_ROLw: + if (flags_type == t_ROLw) + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 + case t_ROLd: + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_RORb: + *op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23 + case t_RORw: + if (flags_type == t_RORw) + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 + case t_RORd: + *op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2) + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_DSHLw: // technically not correct for FC_OP3 > 16 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 + *op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3 + break; + case t_DSHLd: + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3) + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 + return; + case t_DSHRw: // technically not correct for FC_OP3 > 16 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 + *op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3 + break; + case t_DSHRd: + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3) + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 + return; +#endif + default: + do_gen_call(fct_ptr, op); + return; + } + + do + { + *op++ = IMM(24, 0, 0, 0); // nop + } while (op < end); +} + +// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { + IMM_OP(40, dest_reg, FC_SEGS_ADDR, index); // lhz dest_reg, index(FC_SEGS_ADDR) +} + +// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) +static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { + IMM_OP(32, dest_reg, FC_SEGS_ADDR, index); // lwz dest_reg, index(FC_SEGS_ADDR) +} + +// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) +static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { + gen_mov_seg32_to_reg(HOST_R8, index); + EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8 +} + +// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) { + IMM_OP(40, dest_reg, FC_REGS_ADDR, index); // lhz dest_reg, index(FC_REGS_ADDR) +} + +// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) { + IMM_OP(32, dest_reg, FC_REGS_ADDR, index); // lwz dest_reg, index(FC_REGS_ADDR) +} + +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) { + IMM_OP(34, dest_reg, FC_REGS_ADDR, index); // lbz dest_reg, index(FC_REGS_ADDR) +} + +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { + gen_mov_regbyte_to_reg_low(dest_reg, index); +} + +// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) +static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) { + IMM_OP(44, src_reg, FC_REGS_ADDR, index); // sth src_reg, index(FC_REGS_ADDR) +} + +// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) { + IMM_OP(36, src_reg, FC_REGS_ADDR, index); // stw src_reg, index(FC_REGS_ADDR) +} + +// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR +static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) { + IMM_OP(38, src_reg, FC_REGS_ADDR, index); // stb src_reg, index(FC_REGS_ADDR) +} + +// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_add_regval32_to_reg(HostReg reg,Bitu index) { + gen_mov_regval32_to_reg(HOST_R8, index); + EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8 +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) +static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { + IMM_OP(dword ? 36 : 44, src_reg, FC_REGS_ADDR, index); // stw/sth src_reg, index(FC_REGS_ADDR) +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { + IMM_OP(dword ? 32 : 40, dest_reg, FC_REGS_ADDR, index); // lwz/lhz dest_reg, index(FC_REGS_ADDR) +} +