diff --git a/include/fpu.h b/include/fpu.h
index 44acd31bc9a894cd0d48b853f0788de4b87b2336..2b7cee5ecf24f0ecbfae6fe9c4a1bc9b750427ab 100644
--- a/include/fpu.h
+++ b/include/fpu.h
@@ -85,7 +85,7 @@ enum FPU_Round {
 	ROUND_Chop    = 3
 };
 
-typedef struct {
+typedef struct FPU_rec {
 	FPU_Reg		regs[9];
 	FPU_P_Reg	p_regs[9];
 	FPU_Tag		tags[9];
diff --git a/libretro/Makefile.libretro b/libretro/Makefile.libretro
index eaf2acc9377fa01337047efb473643d9d628f45c..d92127c187076eceb9e388332e2f396e7e0df5f9 100644
--- a/libretro/Makefile.libretro
+++ b/libretro/Makefile.libretro
@@ -157,10 +157,10 @@ else ifeq ($(platform), wiiu)
 	CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT)
 	AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT)
 	WITH_FAKE_SDL = 1
+    WITH_DYNAREC = ppc
 	COMMONFLAGS += -DGEKKO -DWIIU -DHW_RVL -mwup -mcpu=750 -meabi -mhard-float -D__POWERPC__ -D__ppc__ -DMSB_FIRST -DWORDS_BIGENDIAN=1 -I./deps/include/
 	COMMONFLAGS += -U__INT32_TYPE__ -U __UINT32_TYPE__ -D__INT32_TYPE__=int -DWITH_FAKE_SDL
 	STATIC_LINKING = 1
-	WITH_DYNAREC =
 else ifeq ($(platform), libnx)
 	include $(DEVKITPRO)/libnx/switch_rules
 	TARGET := $(TARGET_NAME)_libretro_$(platform).a
diff --git a/patch.diff b/patch.diff
new file mode 100644
index 0000000000000000000000000000000000000000..e6acdf76f5b9c94969993451e162f72b373c2e04
--- /dev/null
+++ b/patch.diff
@@ -0,0 +1,1150 @@
+Index: include/fpu.h
+===================================================================
+--- include/fpu.h	(revision 4185)
++++ include/fpu.h	(working copy)
+@@ -80,7 +80,7 @@
+ 	ROUND_Chop    = 3
+ };
+ 
+-typedef struct {
++typedef struct FPU_rec {
+ 	FPU_Reg		regs[9];
+ 	FPU_P_Reg	p_regs[9];
+ 	FPU_Tag		tags[9];
+Index: src/cpu/core_dynrec.cpp
+===================================================================
+--- src/cpu/core_dynrec.cpp	(revision 4185)
++++ src/cpu/core_dynrec.cpp	(working copy)
+@@ -138,6 +138,7 @@
+ #define MIPSEL		0x03
+ #define ARMV4LE		0x04
+ #define ARMV7LE		0x05
++#define POWERPC		0x06
+ #define ARMV8LE		0x07
+ 
+ #if C_TARGETCPU == X86_64
+@@ -150,8 +151,15 @@
+ #include "core_dynrec/risc_armv4le.h"
+ #elif C_TARGETCPU == ARMV8LE
+ #include "core_dynrec/risc_armv8le.h"
++#elif C_TARGETCPU == POWERPC
++#include "core_dynrec/risc_ppc.h"
+ #endif
+ 
++#if !defined(WORDS_BIGENDIAN)
++#define gen_add_LE gen_add
++#define gen_mov_LE_word_to_reg gen_mov_word_to_reg
++#endif
++
+ #include "core_dynrec/decoder.h"
+ 
+ CacheBlockDynRec * LinkBlocks(BlockReturn ret) {
+Index: src/cpu/core_dynrec/cache.h
+===================================================================
+--- src/cpu/core_dynrec/cache.h	(revision 4185)
++++ src/cpu/core_dynrec/cache.h	(working copy)
+@@ -171,7 +171,7 @@
+ 			invalidation_map=(Bit8u*)malloc(4096);
+ 			memset(invalidation_map,0,4096);
+ 		}
+-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
++#if !defined(C_UNALIGNED_MEMORY)
+ 		host_writew(&invalidation_map[addr],
+ 			host_readw(&invalidation_map[addr])+0x101);
+ #else
+@@ -193,7 +193,7 @@
+ 			invalidation_map=(Bit8u*)malloc(4096);
+ 			memset(invalidation_map,0,4096);
+ 		}
+-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
++#if !defined(C_UNALIGNED_MEMORY)
+ 		host_writed(&invalidation_map[addr],
+ 			host_readd(&invalidation_map[addr])+0x1010101);
+ #else
+@@ -240,7 +240,7 @@
+ 				invalidation_map=(Bit8u*)malloc(4096);
+ 				memset(invalidation_map,0,4096);
+ 			}
+-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
++#if !defined(C_UNALIGNED_MEMORY)
+ 			host_writew(&invalidation_map[addr],
+ 				host_readw(&invalidation_map[addr])+0x101);
+ #else
+@@ -269,7 +269,7 @@
+ 				invalidation_map=(Bit8u*)malloc(4096);
+ 				memset(invalidation_map,0,4096);
+ 			}
+-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
++#if !defined(C_UNALIGNED_MEMORY)
+ 			host_writed(&invalidation_map[addr],
+ 				host_readd(&invalidation_map[addr])+0x1010101);
+ #else
+@@ -553,6 +553,8 @@
+ 
+ static void dyn_return(BlockReturn retcode,bool ret_exception);
+ static void dyn_run_code(void);
++static void cache_block_before_close(void);
++static void cache_block_closing(Bit8u* block_start,Bitu block_size);
+ 
+ 
+ /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */
+@@ -614,18 +616,26 @@
+ 		}
+ 		// setup the default blocks for block linkage returns
+ 		cache.pos=&cache_code_link_blocks[0];
++		core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
++		// can use up to PAGESIZE_TEMP-64 bytes
++		dyn_run_code();
++		cache_block_before_close();
++
++		cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64];
+ 		link_blocks[0].cache.start=cache.pos;
+ 		// link code that returns with a special return code
++		// must be less than 32 bytes
+ 		dyn_return(BR_Link1,false);
+-		cache.pos=&cache_code_link_blocks[32];
++		cache_block_before_close();
++
++		cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32];
+ 		link_blocks[1].cache.start=cache.pos;
+ 		// link code that returns with a special return code
++		// must be less than 32 bytes
+ 		dyn_return(BR_Link2,false);
++		cache_block_before_close();
+ 
+-		cache.pos=&cache_code_link_blocks[64];
+-		core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
+-//		link_blocks[1].cache.start=cache.pos;
+-		dyn_run_code();
++		cache_block_closing(cache_code_link_blocks, PAGESIZE_TEMP);
+ 
+ 		cache.free_pages=0;
+ 		cache.last_page=0;
+Index: src/cpu/core_dynrec/decoder_basic.h
+===================================================================
+--- src/cpu/core_dynrec/decoder_basic.h	(revision 4185)
++++ src/cpu/core_dynrec/decoder_basic.h	(working copy)
+@@ -986,10 +986,10 @@
+ 							// succeeded, use the pointer to avoid code invalidation
+ 							if (!addseg) {
+ 								if (!scaled_reg_used) {
+-									gen_mov_word_to_reg(ea_reg,(void*)val,true);
++									gen_mov_LE_word_to_reg(ea_reg,(void*)val,true);
+ 								} else {
+ 									DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0);
+-									gen_add(ea_reg,(void*)val);
++									gen_add_LE(ea_reg,(void*)val);
+ 								}
+ 							} else {
+ 								if (!scaled_reg_used) {
+@@ -997,7 +997,7 @@
+ 								} else {
+ 									DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
+ 								}
+-								gen_add(ea_reg,(void*)val);
++								gen_add_LE(ea_reg,(void*)val);
+ 							}
+ 							return;
+ 						}
+@@ -1038,10 +1038,10 @@
+ 						if (!addseg) {
+ 							if (!scaled_reg_used) {
+ 								MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
+-								gen_add(ea_reg,(void*)val);
++								gen_add_LE(ea_reg,(void*)val);
+ 							} else {
+ 								DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0);
+-								gen_add(ea_reg,(void*)val);
++								gen_add_LE(ea_reg,(void*)val);
+ 							}
+ 						} else {
+ 							if (!scaled_reg_used) {
+@@ -1050,7 +1050,7 @@
+ 								DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
+ 							}
+ 							ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
+-							gen_add(ea_reg,(void*)val);
++							gen_add_LE(ea_reg,(void*)val);
+ 						}
+ 						return;
+ 					}
+@@ -1115,11 +1115,11 @@
+ 				// succeeded, use the pointer to avoid code invalidation
+ 				if (!addseg) {
+ 					MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
+-					gen_add(ea_reg,(void*)val);
++					gen_add_LE(ea_reg,(void*)val);
+ 				} else {
+ 					MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base));
+ 					ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
+-					gen_add(ea_reg,(void*)val);
++					gen_add_LE(ea_reg,(void*)val);
+ 				}
+ 				return;
+ 			}
+Index: src/cpu/core_dynrec/decoder_opcodes.h
+===================================================================
+--- src/cpu/core_dynrec/decoder_opcodes.h	(revision 4185)
++++ src/cpu/core_dynrec/decoder_opcodes.h	(working copy)
+@@ -250,12 +250,12 @@
+ 	Bitu val;
+ 	if (decode.big_op) {
+ 		if (decode_fetchd_imm(val)) {
+-			gen_mov_word_to_reg(FC_OP2,(void*)val,true);
++			gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true);
+ 			return;
+ 		}
+ 	} else {
+ 		if (decode_fetchw_imm(val)) {
+-			gen_mov_word_to_reg(FC_OP2,(void*)val,false);
++			gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false);
+ 			return;
+ 		}
+ 	}
+@@ -287,13 +287,13 @@
+ 	Bitu val;
+ 	if (decode.big_op) {
+ 		if (decode_fetchd_imm(val)) {
+-			gen_mov_word_to_reg(FC_OP1,(void*)val,true);
++			gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true);
+ 			MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg);
+ 			return;
+ 		}
+ 	} else {
+ 		if (decode_fetchw_imm(val)) {
+-			gen_mov_word_to_reg(FC_OP1,(void*)val,false);
++			gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false);
+ 			MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg);
+ 			return;
+ 		}
+@@ -330,7 +330,7 @@
+ 	if (decode.big_addr) {
+ 		Bitu val;
+ 		if (decode_fetchd_imm(val)) {
+-			gen_add(FC_ADDR,(void*)val);
++			gen_add_LE(FC_ADDR,(void*)val);
+ 		} else {
+ 			gen_add_imm(FC_ADDR,(Bit32u)val);
+ 		}
+@@ -1179,7 +1179,7 @@
+ 		gen_call_function_raw((void*)&dynrec_pop_word);
+ 		gen_extend_word(false,FC_RETOP);
+ 	}
+-	gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),true);
++	gen_mov_word_from_reg(FC_RETOP,(void*)(&reg_eip),true);
+ 
+ 	if (bytes) gen_add_direct_word(&reg_esp,bytes,true);
+ 	dyn_return(BR_Normal);
+Index: src/cpu/core_dynrec/Makefile.am
+===================================================================
+--- src/cpu/core_dynrec/Makefile.am	(revision 4185)
++++ src/cpu/core_dynrec/Makefile.am	(working copy)
+@@ -2,4 +2,5 @@
+                  dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \
+                  risc_armv4le.h risc_armv4le-common.h \
+                  risc_armv4le-o3.h risc_armv4le-thumb.h \
+-                 risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h
++                 risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \
++                 risc_ppc.h
+Index: src/cpu/core_dynrec/risc_ppc.h
+===================================================================
+--- src/cpu/core_dynrec/risc_ppc.h	(revision 0)
++++ src/cpu/core_dynrec/risc_ppc.h	(working copy)
+@@ -0,0 +1,897 @@
++/*
++ *  Copyright (C) 2002-2019  The DOSBox Team
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with this program; if not, write to the Free Software Foundation, Inc.,
++ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
++ */
++
++// some configuring defines that specify the capabilities of this architecture
++// or aspects of the recompiling
++
++// protect FC_ADDR over function calls if necessaray
++//#define DRC_PROTECT_ADDR_REG
++
++// try to use non-flags generating functions if possible
++#define DRC_FLAGS_INVALIDATION
++// try to replace _simple functions by code
++#define DRC_FLAGS_INVALIDATION_DCODE
++
++// type with the same size as a pointer
++#define DRC_PTR_SIZE_IM Bit32u
++
++// calling convention modifier
++#define DRC_FC /* nothing */
++#define DRC_CALL_CONV /* nothing */
++
++#define DRC_USE_REGS_ADDR
++#define DRC_USE_SEGS_ADDR
++
++// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value)
++#define USE_SDA_BASE
++
++// register mapping
++enum HostReg {
++	HOST_R0=0,
++	HOST_R1,
++	HOST_R2,
++	HOST_R3,
++	HOST_R4,
++	HOST_R5,
++	HOST_R6,
++	HOST_R7,
++	HOST_R8,
++	HOST_R9,
++	HOST_R10,
++	HOST_R11,
++	HOST_R12,
++	HOST_R13,
++	HOST_R14,
++	HOST_R15,
++	HOST_R16,
++	HOST_R17,
++	HOST_R18,
++	HOST_R19,
++	HOST_R20,
++	HOST_R21,
++	HOST_R22,
++	HOST_R23,
++	HOST_R24,
++	HOST_R25,
++	HOST_R26,
++	HOST_R27,
++	HOST_R28,
++	HOST_R29,
++	HOST_R30,
++	HOST_R31,
++
++	HOST_NONE
++};
++
++static const HostReg RegParams[] = {
++	HOST_R3, HOST_R4, HOST_R5, HOST_R6,
++	HOST_R7, HOST_R8, HOST_R9, HOST_R10
++};
++
++#if C_FPU
++extern struct FPU_rec fpu;
++#endif
++
++#if defined(USE_SDA_BASE)
++extern Bit32u _SDA_BASE_[];
++#endif
++
++// register that holds function return values
++#define FC_RETOP HOST_R3
++
++// register used for address calculations, if the ABI does not
++// state that this register is preserved across function calls
++// then define DRC_PROTECT_ADDR_REG above
++#define FC_ADDR HOST_R29
++
++// register that points to Segs[]
++#define FC_SEGS_ADDR HOST_R30
++// register that points to cpu_regs[]
++#define FC_REGS_ADDR HOST_R31
++
++// register that holds the first parameter
++#define FC_OP1 RegParams[0]
++
++// register that holds the second parameter
++#define FC_OP2 RegParams[1]
++
++// special register that holds the third parameter for _R3 calls (byte accessible)
++#define FC_OP3 RegParams[2]
++
++// register that holds byte-accessible temporary values
++//#define FC_TMP_BA1 HOST_R6
++#define FC_TMP_BA1 FC_OP2
++
++// register that holds byte-accessible temporary values
++//#define FC_TMP_BA2 HOST_R7
++#define FC_TMP_BA2 FC_OP1
++
++// temporary register for LEA
++#define TEMP_REG_DRC HOST_R10
++
++#define IMM(op, regsd, rega, imm)           (((op)<<26)|((regsd)<<21)|((rega)<<16)|             (((Bit32u)(imm))&0xFFFF))
++#define EXT(regsd, rega, regb, op, rc)      (  (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)|          ((op)<<1)|(rc))
++#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)|  ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc))
++
++#define IMM_OP(op, regsd, rega, imm)           cache_addd(IMM(op, regsd, rega, imm))
++#define EXT_OP(regsd, rega, regb, op, rc)      cache_addd(EXT(regsd, rega, regb, op, rc))
++#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
++
++// move a full register from reg_src to reg_dst
++static void gen_mov_regs(HostReg reg_dst,HostReg reg_src)
++{
++	if (reg_dst != reg_src)
++		EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src)
++}
++
++// move a 16bit constant value into dest_reg
++// the upper 16bit of the destination register may be destroyed
++static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)
++{
++	IMM_OP(14, dest_reg, 0, imm); // li dest,imm
++}
++
++DRC_PTR_SIZE_IM block_ptr;
++
++// Helper for loading addresses
++static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest)
++{
++	Bit32s off;
++
++	if ((Bit16s)addr == addr)
++		return HOST_R0;
++
++	off = addr - (Bit32s)&Segs;
++	if ((Bit16s)off == off)
++	{
++		addr = off;
++		return FC_SEGS_ADDR;
++	}
++
++	off = addr - (Bit32s)&cpu_regs;
++	if ((Bit16s)off == off)
++	{
++		addr = off;
++		return FC_REGS_ADDR;
++	}
++
++	off = addr - (Bit32s)block_ptr;
++	if ((Bit16s)off == off)
++	{
++		addr = off;
++		return HOST_R27;
++	}
++
++#if C_FPU
++	off = addr - (Bit32s)&fpu;
++	if ((Bit16s)off == off)
++	{
++		addr = off;
++		return HOST_R28;
++	}
++#endif
++
++#if defined(USE_SDA_BASE)
++	off = addr - (Bit32s)_SDA_BASE_;
++	if ((Bit16s)off == off)
++	{
++		addr = off;
++		return HOST_R13;
++	}
++#endif
++
++	IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha
++	addr = (Bit16s)addr;
++	return dest;
++}
++
++// move a 32bit constant value into dest_reg
++static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)
++{
++	HostReg ld = gen_addr((Bit32s&)imm, dest_reg);
++	if (imm || ld != dest_reg)
++		IMM_OP(14, dest_reg, ld, imm);   // addi dest_reg, ldr, imm@l
++}
++
++// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
++// 16bit moves may destroy the upper 16bit of the destination register
++static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
++	Bit32s addr = (Bit32s)data;
++	HostReg ld = gen_addr(addr, dest_reg);
++	IMM_OP(dword ? 32:40, dest_reg, ld, addr);  // lwz/lhz dest, addr@l(ld)
++}
++
++// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg
++static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) {
++	Bit32u addr = (Bit32u)data;
++	gen_mov_dword_to_reg_imm(dest_reg, addr);
++	EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest
++}
++
++// move an 8bit constant value into dest_reg
++// the upper 24bit of the destination register can be destroyed
++// this function does not use FC_OP1/FC_OP2 as dest_reg as these
++// registers might not be directly byte-accessible on some architectures
++static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
++	gen_mov_word_to_reg_imm(dest_reg, imm);
++}
++
++// move an 8bit constant value into dest_reg
++// the upper 24bit of the destination register can be destroyed
++// this function can use FC_OP1/FC_OP2 as dest_reg which are
++// not directly byte-accessible on some architectures
++static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
++	gen_mov_word_to_reg_imm(dest_reg, imm);
++}
++
++// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
++static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword)
++{
++	Bit32s addr = (Bit32s)dest;
++	HostReg ld = gen_addr(addr, HOST_R8);
++	IMM_OP(dword ? 36 : 44, src_reg, ld, addr);  // stw/sth src,addr@l(ld)
++}
++
++// move an 8bit value from memory into dest_reg
++// the upper 24bit of the destination register can be destroyed
++// this function does not use FC_OP1/FC_OP2 as dest_reg as these
++// registers might not be directly byte-accessible on some architectures
++static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data)
++{
++	Bit32s addr = (Bit32s)data;
++	HostReg ld = gen_addr(addr, dest_reg);
++	IMM_OP(34, dest_reg, ld, addr);  // lbz dest,addr@l(ld)
++}
++
++// move an 8bit value from memory into dest_reg
++// the upper 24bit of the destination register can be destroyed
++// this function can use FC_OP1/FC_OP2 as dest_reg which are
++// not directly byte-accessible on some architectures
++static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
++	gen_mov_byte_to_reg_low(dest_reg, data);
++}
++
++// move the lowest 8bit of a register into memory
++static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest)
++{
++	Bit32s addr = (Bit32s)dest;
++	HostReg ld = gen_addr(addr, HOST_R8);
++	IMM_OP(38, src_reg, ld, addr);  // stb src_reg,addr@l(ld)
++}
++
++// convert an 8bit word to a 32bit dword
++// the register is zero-extended (sign==false) or sign-extended (sign==true)
++static void gen_extend_byte(bool sign,HostReg reg)
++{
++	if (sign)
++	{
++		EXT_OP(reg, reg, 0, 954, 0); // extsb reg, reg
++		return;
++	}
++
++	// check if previous instruction is "lbz reg, *
++	if ((*(Bit32u*)(cache.pos-4) & 0xFFE00000) != IMM(34, reg, 0, 0))
++		RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, reg, 0, 24, 31
++	// else register is already zero-extended
++}
++
++// convert a 16bit word to a 32bit dword
++// the register is zero-extended (sign==false) or sign-extended (sign==true)
++static void gen_extend_word(bool sign,HostReg reg)
++{
++	// check if previous instruction is "lhz reg, *"
++	Bit32u *op = (Bit32u*)(cache.pos-4);
++	if ((*op & 0xFFE00000) == IMM(40, reg, 0, 0))
++	{
++		if (sign) // change lhz -> lha
++			*op |= 0x08000000;
++		// else zero-extension already done
++		return;
++	}
++
++	if (sign)
++		EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
++	else
++		RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
++}
++
++// add a 32bit value from memory to a full register
++static void gen_add(HostReg reg,void* op)
++{
++	gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
++	EXT_OP(reg,reg,HOST_R8,266,0);          // add reg,reg,r8
++}
++
++// add a 32bit value from host memory to a full register
++static void gen_add_LE(HostReg reg,void* op)
++{
++	gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24);
++	EXT_OP(reg,reg,HOST_R8,266,0);       // add reg,reg,r8
++}
++
++// add a 32bit constant value to a full register
++static void gen_add_imm(HostReg reg,Bit32u imm)
++{
++	if ((Bit16s)imm != (Bit32s)imm)
++		IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
++	if ((Bit16s)imm)
++		IMM_OP(14, reg, reg, imm);              // addi reg, reg, imm@l
++}
++
++// and a 32bit constant value with a full register
++static void gen_and_imm(HostReg reg,Bit32u imm) {
++	Bits sbit,ebit,tbit,bbit,abit,i;
++
++	// sbit = number of leading 0 bits
++	// ebit = number of trailing 0 bits
++	// tbit = number of total 0 bits
++	// bbit = number of leading 1 bits
++	// abit = number of trailing 1 bits
++
++	if (imm == 0xFFFFFFFF)
++		return;
++
++	if (!imm)
++		return gen_mov_word_to_reg_imm(reg, 0);
++
++	sbit = ebit = tbit = bbit = abit = 0;
++	for (i=0; i < 32; i++)
++	{
++		if (!(imm & (1<<(31-i))))
++		{
++			abit = 0;
++			tbit++;
++			if (sbit == i)
++				sbit++;
++			ebit++;
++		}
++		else
++		{
++			ebit = 0;
++			if (bbit == i)
++				bbit++;
++			abit++;
++		}
++	}
++
++	if (sbit >= 16)
++	{
++		IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
++		return;
++	}
++	if (ebit >= 16)
++	{
++		IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
++		return;
++	}
++
++	if (sbit + ebit == tbit)
++	{
++		RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
++		return;
++	}
++
++	if (bbit + abit == (32 - tbit))
++	{
++		RLW_OP(21,reg,reg,0,31-abit,bbit,0); // rlwinm reg,reg,0,31-abit,bbit
++		return;
++	}
++
++	gen_mov_dword_to_reg_imm(HOST_R8, imm);
++	EXT_OP(reg, reg, HOST_R8, 28, 0);  // and reg, reg, r8
++}
++
++// move a 32bit constant value into memory
++static void gen_mov_direct_dword(void* dest,Bit32u imm) {
++	gen_mov_dword_to_reg_imm(HOST_R9, imm);
++	gen_mov_word_from_reg(HOST_R9, dest, 1);
++}
++
++// move an address into memory (assumes address != NULL)
++static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm)
++{
++	block_ptr = 0;
++	gen_mov_dword_to_reg_imm(HOST_R27, imm);
++	// this will probably be used to look-up the linked blocks
++	block_ptr = imm;
++	gen_mov_word_from_reg(HOST_R27, dest, 1);
++}
++
++// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value
++static void gen_add_direct_word(void* dest,Bit32u imm,bool dword)
++{
++	HostReg ld;
++	Bit32s addr = (Bit32s)dest;
++
++	if (!dword)
++	{
++		imm &= 0xFFFF;
++		addr += 2;
++	}
++
++	if (!imm)
++		return;
++
++	ld = gen_addr(addr, HOST_R8);
++	IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
++	if (dword && (Bit16s)imm != (Bit32s)imm)
++		IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
++	if (!dword || (Bit16s)imm)
++		IMM_OP(14, HOST_R9, HOST_R9, imm);      // addi r9,r9,imm@l
++	IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
++}
++
++// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value
++static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
++	gen_add_direct_word(dest, -(Bit32s)imm, dword);
++}
++
++// effective address calculation, destination is dest_reg
++// scale_reg is scaled by scale (scale_reg*(2^scale)) and
++// added to dest_reg, then the immediate value is added
++static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)
++{
++	if (scale)
++	{
++		RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // rlwinm r8,scale_reg,scale,0,31-scale
++		scale_reg = HOST_R8;
++	}
++
++	gen_add_imm(dest_reg, imm);
++	EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
++}
++
++// effective address calculation, destination is dest_reg
++// dest_reg is scaled by scale (dest_reg*(2^scale)),
++// then the immediate value is added
++static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm)
++{
++	if (scale)
++		RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // rlwinm dest,dest,scale,0,31-scale
++
++	gen_add_imm(dest_reg, imm);
++}
++
++// helper function to choose direct or indirect call
++static void INLINE do_gen_call(void *func, Bit32u *pos)
++{
++	Bit32s f = (Bit32s)func;
++	Bit32s off = f - (Bit32s)pos;
++
++	// relative branches are limited to +/- ~32MB
++	if (off < 0x02000000 && off >= -0x02000000)
++	{
++		pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func
++		pos[1] = IMM(24, 0, 0, 0); // nop
++		pos[2] = IMM(24, 0, 0, 0);
++		pos[3] = IMM(24, 0, 0, 0);
++		return;
++	}
++
++	pos[0] = IMM(15, HOST_R8, 0, f>>16);      // lis r8,imm@h
++	pos[1] = IMM(24, HOST_R8, HOST_R8, f);    // ori r8,r8,imm@l
++	pos[2] = EXT(HOST_R8, 9, 0, 467, 0);      // mtctr r8
++	pos[3] = IMM(19, 0b10100, 0, (528<<1)|1); // bctrl
++}
++
++// generate a call to a parameterless function
++static void INLINE gen_call_function_raw(void * func)
++{
++	do_gen_call(func, (Bit32u*)cache.pos);
++	cache.pos += 16;
++}
++
++// generate a call to a function with paramcount parameters
++// note: the parameters are loaded in the architecture specific way
++// using the gen_load_param_ functions below
++static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false)
++{
++	Bit32u proc_addr=(Bit32u)cache.pos;
++	gen_call_function_raw(func);
++	return proc_addr;
++}
++
++// load an immediate value as param'th function parameter
++static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
++	gen_mov_dword_to_reg_imm(RegParams[param], imm);
++}
++
++// load an address as param'th function parameter
++static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
++	gen_load_param_imm(addr, param);
++}
++
++// load a host-register as param'th function parameter
++static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
++	gen_mov_regs(RegParams[param], (HostReg)reg);
++}
++
++// load a value from memory as param'th function parameter
++static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
++	gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
++}
++
++// jump to an address pointed at by ptr, offset is in imm
++static void gen_jmp_ptr(void * ptr,Bits imm=0) {
++	gen_mov_word_to_reg(HOST_R8,ptr,true);                // r8 = *(Bit32u*)ptr
++	if ((Bit16s)imm != (Bit32s)imm)
++		IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha
++	IMM_OP(32, HOST_R8, HOST_R8, imm);                    // lwz r8, imm@l(r8)
++	EXT_OP(HOST_R8, 9, 0, 467, 0);                        // mtctr r8
++	IMM_OP(19, 0b10100, 0, 528<<1);                       // bctr
++}
++
++// short conditional jump (+-127 bytes) if register is zero
++// the destination is set by gen_fill_branch() later
++static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword)
++{
++	if (!dword)
++		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
++	else
++		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
++
++	IMM_OP(16, 0b01100, 2, 0); // bc 12,CR0[Z] (beq)
++	return ((Bit32u)cache.pos-4);
++}
++
++// short conditional jump (+-127 bytes) if register is nonzero
++// the destination is set by gen_fill_branch() later
++static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword)
++{
++	if (!dword)
++		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
++	else
++		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
++
++	IMM_OP(16, 0b00100, 2, 0); // bc 4,CR0[Z] (bne)
++	return ((Bit32u)cache.pos-4);
++}
++
++// calculate relative offset and fill it into the location pointed to by data
++static void gen_fill_branch(DRC_PTR_SIZE_IM data)
++{
++#if C_DEBUG
++	Bits len=(Bit32u)cache.pos-data;
++	if (len<0) len=-len;
++	if (len >= 0x8000) LOG_MSG("Big jump %d",len);
++#endif
++
++	((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC;
++}
++
++
++// conditional jump if register is nonzero
++// for isdword==true the 32bit of the register are tested
++// for isdword==false the lowest 8bit of the register are tested
++static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword)
++{
++	if (!dword)
++		IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
++	else
++		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
++
++	IMM_OP(16, 0b00100, 2, 0); // bne
++	return ((Bit32u)cache.pos-4);
++}
++
++// compare 32bit-register against zero and jump if value less/equal than zero
++static Bit32u gen_create_branch_long_leqzero(HostReg reg)
++{
++	EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
++
++	IMM_OP(16, 0b00100, 1, 0); // ble
++	return ((Bit32u)cache.pos-4);
++}
++
++// calculate long relative offset and fill it into the location pointed to by data
++static void gen_fill_branch_long(Bit32u data) {
++	return gen_fill_branch((DRC_PTR_SIZE_IM)data);
++}
++
++static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
++#if defined(__GNUC__)
++	Bit8u* start = (Bit8u*)((Bit32u)block_start & -32);
++
++	while (start < block_start + block_size)
++	{
++		asm volatile("dcbst %y0; icbi %y0" :: "Z"(*start));
++		start += 32;
++	}
++	asm volatile("sync; isync");
++#else
++	#error "Don't know how to flush/invalidate CacheBlock with this compiler"
++#endif
++}
++
++static void cache_block_before_close(void) {}
++
++// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
++static Bit32s epilog_addr;
++static Bit8u *getCF_glue;
++static void gen_run_code(void) {
++	// prolog
++	IMM_OP(37, HOST_R1, HOST_R1, -32); // stwu sp,-32(sp)
++	EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
++	EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
++
++	IMM_OP(47, HOST_R26, HOST_R1, 8); // stmw r26, 8(sp)
++
++	IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16);  // lis FC_SEGS_ADDR, Segs@h
++	IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs);     // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l
++
++	IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16);  // lis FC_REGS_ADDR, cpu_regs@h
++	IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs);     // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l
++
++#if C_FPU
++	IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16);  // lis r28, fpu@h
++	IMM_OP(24, HOST_R28, HOST_R28, &fpu);         // ori r28, r28, fpu@l
++#endif
++
++	IMM_OP(36, HOST_R0, HOST_R1, 32+4); // stw r0,32+4(sp)
++	IMM_OP(19, 0b10100, 0, 528<<1);     // bctr
++
++	// epilog
++	epilog_addr = (Bit32s)cache.pos;
++	IMM_OP(32, HOST_R0, HOST_R1, 32+4); // lwz r0,32+4(sp)
++	IMM_OP(46, HOST_R26, HOST_R1, 8);    // lmw r26, 8(sp)
++	EXT_OP(HOST_R0, 8, 0, 467, 0);      // mtlr r0
++	IMM_OP(14, HOST_R1, HOST_R1, 32);      // addi sp, sp, 32
++	IMM_OP(19, 0b10100, 0, 16<<1);         // blr
++
++	// trampoline to call get_CF()
++	getCF_glue = cache.pos;
++	gen_mov_dword_to_reg_imm(FC_OP1, (Bit32u)get_CF); // FC_OP1 = &get_CF
++	EXT_OP(FC_OP1, 9, 0, 467, 0);   // mtctr FC_OP1
++	IMM_OP(19, 0b10100, 0, 528<<1); // bctr
++}
++
++// return from a function
++static void gen_return_function(void)
++{
++	Bit32s off = epilog_addr - (Bit32s)cache.pos;
++
++	// relative branches are limited to +/- 32MB
++	if (off < 0x02000000 && off >= -0x02000000) {
++		cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b epilog
++		return;
++	}
++
++	gen_mov_dword_to_reg_imm(HOST_R8, epilog_addr);
++	EXT_OP(HOST_R8, 9, 0, 467, 0);  // mtctr r8
++	IMM_OP(19, 0b10100, 0, 528<<1); // bctr
++}
++
++// called when a call to a function can be replaced by a
++// call to a simpler function
++static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type)
++{
++	Bit32u *op = (Bit32u*)pos;
++	Bit32u *end = op+4;
++
++	switch (flags_type) {
++#if defined(DRC_FLAGS_INVALIDATION_DCODE)
++		// try to avoid function calls but rather directly fill in code
++		case t_ADDb:
++		case t_ADDw:
++		case t_ADDd:
++			*op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_ORb:
++		case t_ORw:
++		case t_ORd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_ADCb:
++		case t_ADCw:
++		case t_ADCd:
++			op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
++			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
++			op[2] = IMM(12, HOST_R0, FC_RETOP, -1);        // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = CF!=0)
++			op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0);    // addze; FC_RETOP = r26 + CF!=0
++			return;
++		case t_SBBb:
++		case t_SBBw:
++		case t_SBBd:
++			op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0);  // r26 = FC_OP1 - FC_OP2
++			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
++			op[2] = IMM(8, HOST_R0, FC_RETOP, 0);          // subfic r0, FC_RETOP, 0 (XER[CA] = CF==0)
++			op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0);    // addme; FC_RETOP = r26 - 1 + CF==0
++			return;
++		case t_ANDb:
++		case t_ANDw:
++		case t_ANDd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_SUBb:
++		case t_SUBw:
++		case t_SUBd:
++			*op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
++			break;
++		case t_XORb:
++		case t_XORw:
++		case t_XORd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_CMPb:
++		case t_CMPw:
++		case t_CMPd:
++		case t_TESTb:
++		case t_TESTw:
++		case t_TESTd:
++			break;
++		case t_INCb:
++		case t_INCw:
++		case t_INCd:
++			*op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
++			break;
++		case t_DECb:
++		case t_DECw:
++		case t_DECd:
++			*op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
++			break;
++		case t_NEGb:
++		case t_NEGw:
++		case t_NEGd:
++			*op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
++			break;
++		case t_SHLb:
++		case t_SHLw:
++		case t_SHLd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_SHRb:
++		case t_SHRw:
++		case t_SHRd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
++			break;
++		case t_SARb:
++			*op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
++		case t_SARw:
++			if (flags_type == t_SARw)
++				*op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
++		case t_SARd:
++			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
++			break;
++
++		case t_ROLb:
++			*op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
++		case t_ROLw:
++			if (flags_type == t_ROLw)
++				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
++		case t_ROLd:
++			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
++			break;
++
++		case t_RORb:
++			*op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
++		case t_RORw:
++			if (flags_type == t_RORw)
++				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
++		case t_RORd:
++			*op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
++			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
++			break;
++
++		case t_DSHLw: // technically not correct for FC_OP3 > 16
++			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
++			*op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
++			break;
++		case t_DSHLd:
++			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
++			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
++			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
++			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
++			return;
++		case t_DSHRw: // technically not correct for FC_OP3 > 16
++			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
++			*op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
++			break;
++		case t_DSHRd:
++			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
++			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
++			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
++			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
++			return;
++#endif
++		default:
++			do_gen_call(fct_ptr, op);
++			return;
++	}
++
++	do
++	{
++		*op++ = IMM(24, 0, 0, 0); // nop
++	} while (op < end);
++}
++
++// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
++// 16bit moves may destroy the upper 16bit of the destination register
++static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
++	IMM_OP(40, dest_reg, FC_SEGS_ADDR, index); // lhz dest_reg, index(FC_SEGS_ADDR)
++}
++
++// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
++static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
++	IMM_OP(32, dest_reg, FC_SEGS_ADDR, index); // lwz dest_reg, index(FC_SEGS_ADDR)
++}
++
++// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
++static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
++	gen_mov_seg32_to_reg(HOST_R8, index);
++	EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8
++}
++
++// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
++// 16bit moves may destroy the upper 16bit of the destination register
++static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
++	IMM_OP(40, dest_reg, FC_REGS_ADDR, index); // lhz dest_reg, index(FC_REGS_ADDR)
++}
++
++// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
++static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
++	IMM_OP(32, dest_reg, FC_REGS_ADDR, index); // lwz dest_reg, index(FC_REGS_ADDR)
++}
++
++// move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
++// the upper 24bit of the destination register can be destroyed
++// this function does not use FC_OP1/FC_OP2 as dest_reg as these
++// registers might not be directly byte-accessible on some architectures
++static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
++	IMM_OP(34, dest_reg, FC_REGS_ADDR, index); // lbz dest_reg, index(FC_REGS_ADDR)
++}
++
++// move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
++// the upper 24bit of the destination register can be destroyed
++// this function can use FC_OP1/FC_OP2 as dest_reg which are
++// not directly byte-accessible on some architectures
++static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
++	gen_mov_regbyte_to_reg_low(dest_reg, index);
++}
++
++// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
++static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
++	IMM_OP(44, src_reg, FC_REGS_ADDR, index); // sth src_reg, index(FC_REGS_ADDR)
++}
++
++// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
++static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
++	IMM_OP(36, src_reg, FC_REGS_ADDR, index); // stw src_reg, index(FC_REGS_ADDR)
++}
++
++// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
++static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
++	IMM_OP(38, src_reg, FC_REGS_ADDR, index); // stb src_reg, index(FC_REGS_ADDR)
++}
++
++// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
++static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
++	gen_mov_regval32_to_reg(HOST_R8, index);
++	EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8
++}
++
++// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
++static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
++	IMM_OP(dword ? 36 : 44, src_reg, FC_REGS_ADDR, index); // stw/sth src_reg, index(FC_REGS_ADDR)
++}
++
++// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
++// 16bit moves may destroy the upper 16bit of the destination register
++static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
++	IMM_OP(dword ? 32 : 40, dest_reg, FC_REGS_ADDR, index); // lwz/lhz dest_reg, index(FC_REGS_ADDR)
++}
++
diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp
index 7a7746f8048d0b56bbc6d32db1cb7cf88358d7af..08b48e0273209b4f4950f7d475c399fbc6914261 100644
--- a/src/cpu/core_dynrec.cpp
+++ b/src/cpu/core_dynrec.cpp
@@ -153,6 +153,7 @@ static struct {
 #define MIPSEL		0x03
 #define ARMV4LE		0x04
 #define ARMV7LE		0x05
+#define POWERPC		0x06
 #define ARMV8LE		0x07
 
 #if C_TARGETCPU == X86_64
@@ -165,6 +166,13 @@ static struct {
 #include "core_dynrec/risc_armv4le.h"
 #elif C_TARGETCPU == ARMV8LE
 #include "core_dynrec/risc_armv8le.h"
+#elif C_TARGETCPU == POWERPC
+#include "core_dynrec/risc_ppc.h"
+#endif
+
+#if !defined(WORDS_BIGENDIAN)
+#define gen_add_LE gen_add
+#define gen_mov_LE_word_to_reg gen_mov_word_to_reg
 #endif
 
 #include "core_dynrec/decoder.h"
diff --git a/src/cpu/core_dynrec.cpp.orig b/src/cpu/core_dynrec.cpp.orig
new file mode 100644
index 0000000000000000000000000000000000000000..3df063121772e0a1d69e07260dee8bedc5e30e0d
--- /dev/null
+++ b/src/cpu/core_dynrec.cpp.orig
@@ -0,0 +1,353 @@
+/*
+ *  Copyright (C) 2002-2019  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+
+#include "dosbox.h"
+
+#if (C_DYNREC)
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#if defined (WIN32)
+#include <windows.h>
+#include <winbase.h>
+#endif
+
+#if (C_HAVE_MPROTECT)
+#include <sys/mman.h>
+
+#include <limits.h>
+#ifndef PAGESIZE
+#define PAGESIZE 4096
+#endif
+#endif /* C_HAVE_MPROTECT */
+
+#include "callback.h"
+#include "regs.h"
+#include "mem.h"
+#include "cpu.h"
+#include "debug.h"
+#include "paging.h"
+#include "inout.h"
+#include "lazyflags.h"
+#include "pic.h"
+
+#define CACHE_MAXSIZE	(4096*2)
+#define CACHE_TOTAL		(1024*1024*8)
+#define CACHE_PAGES		(512)
+#define CACHE_BLOCKS	(128*1024)
+#define CACHE_ALIGN		(16)
+#define DYN_HASH_SHIFT	(4)
+#define DYN_PAGE_HASH	(4096>>DYN_HASH_SHIFT)
+#define DYN_LINKS		(16)
+
+
+//#define DYN_LOG 1 //Turn Logging on.
+
+#ifdef HAVE_LIBNX
+#include <switch.h>
+
+extern "C" {
+Jit dynarec_jit;
+void *jit_rx_addr = 0;
+u_char *jit_dynrec = 0;
+void *jit_rw_addr = 0;
+void *jit_rw_buffer = 0;
+void *jit_old_addr = 0;
+size_t jit_len = 0;
+bool jit_is_executable = false;
+}
+#endif
+
+
+#if C_FPU
+#define CPU_FPU 1                                               //Enable FPU escape instructions
+#endif
+
+
+// the emulated x86 registers
+#define DRC_REG_EAX 0
+#define DRC_REG_ECX 1
+#define DRC_REG_EDX 2
+#define DRC_REG_EBX 3
+#define DRC_REG_ESP 4
+#define DRC_REG_EBP 5
+#define DRC_REG_ESI 6
+#define DRC_REG_EDI 7
+
+// the emulated x86 segment registers
+#define DRC_SEG_ES 0
+#define DRC_SEG_CS 1
+#define DRC_SEG_SS 2
+#define DRC_SEG_DS 3
+#define DRC_SEG_FS 4
+#define DRC_SEG_GS 5
+
+
+// access to a general register
+#define DRCD_REG_VAL(reg) (&cpu_regs.regs[reg].dword)
+// access to a segment register
+#define DRCD_SEG_VAL(seg) (&Segs.val[seg])
+// access to the physical value of a segment register/selector
+#define DRCD_SEG_PHYS(seg) (&Segs.phys[seg])
+
+// access to an 8bit general register
+#define DRCD_REG_BYTE(reg,idx) (&cpu_regs.regs[reg].byte[idx?BH_INDEX:BL_INDEX])
+// access to  16/32bit general registers
+#define DRCD_REG_WORD(reg,dwrd) ((dwrd)?((void*)(&cpu_regs.regs[reg].dword[DW_INDEX])):((void*)(&cpu_regs.regs[reg].word[W_INDEX])))
+
+
+enum BlockReturn {
+	BR_Normal=0,
+	BR_Cycles,
+	BR_Link1,BR_Link2,
+	BR_Opcode,
+#if (C_DEBUG)
+	BR_OpcodeFull,
+#endif
+	BR_Iret,
+	BR_CallBack,
+	BR_SMCBlock
+};
+
+// identificator to signal self-modification of the currently executed block
+#define SMC_CURRENT_BLOCK	0xffff
+
+
+static void IllegalOptionDynrec(const char* msg) {
+	E_Exit("DynrecCore: illegal option in %s",msg);
+}
+
+static struct {
+	BlockReturn (*runcode)(Bit8u*);		// points to code that can start a block
+	Bitu callback;				// the occurred callback
+	Bitu readdata;				// spare space used when reading from memory
+	Bit32u protected_regs[8];	// space to save/restore register values
+} core_dynrec;
+
+
+#include "core_dynrec/cache.h"
+
+#define X86			0x01
+#define X86_64		0x02
+#define MIPSEL		0x03
+#define ARMV4LE		0x04
+#define ARMV7LE		0x05
+#define ARMV8LE		0x07
+
+#if C_TARGETCPU == X86_64
+#include "core_dynrec/risc_x64.h"
+#elif C_TARGETCPU == X86
+#include "core_dynrec/risc_x86.h"
+#elif C_TARGETCPU == MIPSEL
+#include "core_dynrec/risc_mipsel32.h"
+#elif (C_TARGETCPU == ARMV4LE) || (C_TARGETCPU == ARMV7LE)
+#include "core_dynrec/risc_armv4le.h"
+#elif C_TARGETCPU == ARMV8LE
+#include "core_dynrec/risc_armv8le.h"
+#endif
+
+#include "core_dynrec/decoder.h"
+
+CacheBlockDynRec * LinkBlocks(BlockReturn ret) {
+	CacheBlockDynRec * block=NULL;
+	// the last instruction was a control flow modifying instruction
+	Bitu temp_ip=SegPhys(cs)+reg_eip;
+	CodePageHandlerDynRec * temp_handler=(CodePageHandlerDynRec *)get_tlb_readhandler(temp_ip);
+	if (temp_handler->flags & PFLAG_HASCODE) {
+		// see if the target is an already translated block
+		block=temp_handler->FindCacheBlock(temp_ip & 4095);
+		if (!block) return NULL;
+
+		// found it, link the current block to
+		cache.block.running->LinkTo(ret==BR_Link2,block);
+		return block;
+	}
+	return NULL;
+}
+
+/*
+	The core tries to find the block that should be executed next.
+	If such a block is found, it is run, otherwise the instruction
+	stream starting at ip_point is translated (see decoder.h) and
+	makes up a new code block that will be run.
+	When control is returned to CPU_Core_Dynrec_Run (which might
+	be right after the block is run, or somewhen long after that
+	due to the direct cacheblock linking) the returncode decides
+	the next action. This might be continuing the translation and
+	execution process, or returning from the core etc.
+*/
+
+Bits CPU_Core_Dynrec_Run(void) {
+	for (;;) {
+		// Determine the linear address of CS:EIP
+		PhysPt ip_point=SegPhys(cs)+reg_eip;
+		#if C_HEAVY_DEBUG
+			if (DEBUG_HeavyIsBreakpoint()) return debugCallback;
+		#endif
+
+		CodePageHandlerDynRec * chandler=0;
+		// see if the current page is present and contains code
+		if (GCC_UNLIKELY(MakeCodePage(ip_point,chandler))) {
+			// page not present, throw the exception
+			CPU_Exception(cpu.exception.which,cpu.exception.error);
+			continue;
+		}
+
+		// page doesn't contain code or is special
+		if (GCC_UNLIKELY(!chandler)) return CPU_Core_Normal_Run();
+
+		// find correct Dynamic Block to run
+		CacheBlockDynRec * block=chandler->FindCacheBlock(ip_point&4095);
+		if (!block) {
+			// no block found, thus translate the instruction stream
+			// unless the instruction is known to be modified
+			if (!chandler->invalidation_map || (chandler->invalidation_map[ip_point&4095]<4)) {
+				// translate up to 32 instructions
+				block=CreateCacheBlock(chandler,ip_point,32);
+			} else {
+				// let the normal core handle this instruction to avoid zero-sized blocks
+				Bitu old_cycles=CPU_Cycles;
+				CPU_Cycles=1;
+				Bits nc_retcode=CPU_Core_Normal_Run();
+				if (!nc_retcode) {
+					CPU_Cycles=old_cycles-1;
+					continue;
+				}
+				CPU_CycleLeft+=old_cycles;
+				return nc_retcode;
+			}
+		}
+
+run_block:
+		cache.block.running=0;
+		// now we're ready to run the dynamic code block
+//		BlockReturn ret=((BlockReturn (*)(void))(block->cache.start))();
+		BlockReturn ret=core_dynrec.runcode(block->cache.start);
+
+		switch (ret) {
+		case BR_Iret:
+#if C_DEBUG
+#if C_HEAVY_DEBUG
+			if (DEBUG_HeavyIsBreakpoint()) return debugCallback;
+#endif
+#endif
+			if (!GETFLAG(TF)) {
+				if (GETFLAG(IF) && PIC_IRQCheck) return CBRET_NONE;
+				break;
+			}
+			// trapflag is set, switch to the trap-aware decoder
+			cpudecoder=CPU_Core_Dynrec_Trap_Run;
+			return CBRET_NONE;
+
+		case BR_Normal:
+			// the block was exited due to a non-predictable control flow
+			// modifying instruction (like ret) or some nontrivial cpu state
+			// changing instruction (for example switch to/from pmode),
+			// or the maximum number of instructions to translate was reached
+#if C_DEBUG
+#if C_HEAVY_DEBUG
+			if (DEBUG_HeavyIsBreakpoint()) return debugCallback;
+#endif
+#endif
+			break;
+
+		case BR_Cycles:
+			// cycles went negative, return from the core to handle
+			// external events, schedule the pic...
+#if C_DEBUG
+#if C_HEAVY_DEBUG
+			if (DEBUG_HeavyIsBreakpoint()) return debugCallback;
+#endif
+#endif
+			return CBRET_NONE;
+
+		case BR_CallBack:
+			// the callback code is executed in dosbox.conf, return the callback number
+			FillFlags();
+			return core_dynrec.callback;
+
+		case BR_SMCBlock:
+//			LOG_MSG("selfmodification of running block at %x:%x",SegValue(cs),reg_eip);
+			cpu.exception.which=0;
+			// fallthrough, let the normal core handle the block-modifying instruction
+		case BR_Opcode:
+			// some instruction has been encountered that could not be translated
+			// (thus it is not part of the code block), the normal core will
+			// handle this instruction
+			CPU_CycleLeft+=CPU_Cycles;
+			CPU_Cycles=1;
+			return CPU_Core_Normal_Run();
+
+#if (C_DEBUG)
+		case BR_OpcodeFull:
+			CPU_CycleLeft+=CPU_Cycles;
+			CPU_Cycles=1;
+			return CPU_Core_Full_Run();
+#endif
+
+		case BR_Link1:
+		case BR_Link2:
+			block=LinkBlocks(ret);
+			if (block) goto run_block;
+			break;
+
+		default:
+			E_Exit("Invalid return code %d", ret);
+		}
+	}
+	return CBRET_NONE;
+}
+
+Bits CPU_Core_Dynrec_Trap_Run(void) {
+	Bits oldCycles = CPU_Cycles;
+	CPU_Cycles = 1;
+	cpu.trap_skip = false;
+
+	// let the normal core execute the next (only one!) instruction
+	Bits ret=CPU_Core_Normal_Run();
+
+	// trap to int1 unless the last instruction deferred this
+	// (allows hardware interrupts to be served without interaction)
+	if (!cpu.trap_skip) CPU_HW_Interrupt(1);
+
+	CPU_Cycles = oldCycles-1;
+	// continue (either the trapflag was clear anyways, or the int1 cleared it)
+	cpudecoder = &CPU_Core_Dynrec_Run;
+
+	return ret;
+}
+
+void CPU_Core_Dynrec_Init(void) {
+}
+
+void CPU_Core_Dynrec_Cache_Init(bool enable_cache) {
+	// Initialize code cache and dynamic blocks
+	cache_init(enable_cache);
+}
+
+void CPU_Core_Dynrec_Cache_Close(void) {
+	cache_close();
+}
+
+#endif
diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am
index f135543e8fc2a7e3ee72f7c1ea1bea680b66baf4..57e7258eef0cce7bc427a58dbf27dae0f4d654a1 100644
--- a/src/cpu/core_dynrec/Makefile.am
+++ b/src/cpu/core_dynrec/Makefile.am
@@ -2,4 +2,5 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \
                  dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \
                  risc_armv4le.h risc_armv4le-common.h \
                  risc_armv4le-o3.h risc_armv4le-thumb.h \
-                 risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h
+                 risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \
+                 risc_ppc.h
diff --git a/src/cpu/core_dynrec/cache.h b/src/cpu/core_dynrec/cache.h
index 3637eee80b24267c58b6dfb0b278057b80d706a0..fd9e9f03afc09e13443d6af6f16566270cbaea4a 100644
--- a/src/cpu/core_dynrec/cache.h
+++ b/src/cpu/core_dynrec/cache.h
@@ -179,7 +179,7 @@ public:
 			invalidation_map=(Bit8u*)malloc(4096);
 			memset(invalidation_map,0,4096);
 		}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
 		host_writew(&invalidation_map[addr],
 			host_readw(&invalidation_map[addr])+0x101);
 #else
@@ -201,7 +201,7 @@ public:
 			invalidation_map=(Bit8u*)malloc(4096);
 			memset(invalidation_map,0,4096);
 		}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
 		host_writed(&invalidation_map[addr],
 			host_readd(&invalidation_map[addr])+0x1010101);
 #else
@@ -248,7 +248,7 @@ public:
 				invalidation_map=(Bit8u*)malloc(4096);
 				memset(invalidation_map,0,4096);
 			}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
 			host_writew(&invalidation_map[addr],
 				host_readw(&invalidation_map[addr])+0x101);
 #else
@@ -277,7 +277,7 @@ public:
 				invalidation_map=(Bit8u*)malloc(4096);
 				memset(invalidation_map,0,4096);
 			}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
 			host_writed(&invalidation_map[addr],
 				host_readd(&invalidation_map[addr])+0x1010101);
 #else
@@ -585,6 +585,8 @@ static INLINE void cache_addq(Bit64u val) {
 
 static void dyn_return(BlockReturn retcode,bool ret_exception);
 static void dyn_run_code(void);
+static void cache_block_before_close(void);
+static void cache_block_closing(Bit8u* block_start,Bitu block_size);
 
 
 /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */
@@ -656,18 +658,26 @@ static void cache_init(bool enable) {
 		}
 		// setup the default blocks for block linkage returns
 		cache.pos=&cache_code_link_blocks[0];
+		core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
+		// can use up to PAGESIZE_TEMP-64 bytes
+		dyn_run_code();
+		cache_block_before_close();
+
+		cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64];
 		link_blocks[0].cache.start=cache.pos;
 		// link code that returns with a special return code
+		// must be less than 32 bytes
 		dyn_return(BR_Link1,false);
-		cache.pos=&cache_code_link_blocks[32];
+		cache_block_before_close();
+
+		cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32];
 		link_blocks[1].cache.start=cache.pos;
 		// link code that returns with a special return code
+		// must be less than 32 bytes
 		dyn_return(BR_Link2,false);
+		cache_block_before_close();
 
-		cache.pos=&cache_code_link_blocks[64];
-		core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
-//		link_blocks[1].cache.start=cache.pos;
-		dyn_run_code();
+		cache_block_closing(cache_code_link_blocks, PAGESIZE_TEMP);
 
 		cache.free_pages=0;
 		cache.last_page=0;
diff --git a/src/cpu/core_dynrec/cache.h.orig b/src/cpu/core_dynrec/cache.h.orig
new file mode 100644
index 0000000000000000000000000000000000000000..e0f67519df80dc6f77a4eac1595a41c62806cc51
--- /dev/null
+++ b/src/cpu/core_dynrec/cache.h.orig
@@ -0,0 +1,707 @@
+/*
+ *  Copyright (C) 2002-2019  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_LIBNX
+#include "../../../switch/mman.h"
+#endif
+
+#ifdef VITA
+#include <psp2/kernel/sysmem.h>
+static int sceBlock;
+#endif
+
+class CodePageHandlerDynRec;	// forward
+
+// basic cache block representation
+class CacheBlockDynRec {
+public:
+	void Clear(void);
+	// link this cache block to another block, index specifies the code
+	// path (always zero for unconditional links, 0/1 for conditional ones
+	void LinkTo(Bitu index,CacheBlockDynRec * toblock) {
+		assert(toblock);
+		link[index].to=toblock;
+		link[index].next=toblock->link[index].from;	// set target block
+		toblock->link[index].from=this;				// remember who links me
+	}
+	struct {
+		Bit16u start,end;		// where in the page is the original code
+		CodePageHandlerDynRec * handler;			// page containing this code
+	} page;
+	struct {
+		Bit8u * start;			// where in the cache are we
+		Bitu size;
+		CacheBlockDynRec * next;
+		// writemap masking maskpointer/start/length
+		// to allow holes in the writemap
+		Bit8u * wmapmask;
+		Bit16u maskstart;
+		Bit16u masklen;
+	} cache;
+	struct {
+		Bitu index;
+		CacheBlockDynRec * next;
+	} hash;
+	struct {
+		CacheBlockDynRec * to;		// this block can transfer control to the to-block
+		CacheBlockDynRec * next;
+		CacheBlockDynRec * from;	// the from-block can transfer control to this block
+	} link[2];	// maximum two links (conditional jumps)
+	CacheBlockDynRec * crossblock;
+};
+
+static struct {
+	struct {
+		CacheBlockDynRec * first;		// the first cache block in the list
+		CacheBlockDynRec * active;		// the current cache block
+		CacheBlockDynRec * free;		// pointer to the free list
+		CacheBlockDynRec * running;		// the last block that was entered for execution
+	} block;
+	Bit8u * pos;		// position in the cache block
+	CodePageHandlerDynRec * free_pages;		// pointer to the free list
+	CodePageHandlerDynRec * used_pages;		// pointer to the list of used pages
+	CodePageHandlerDynRec * last_page;		// the last used page
+} cache;
+
+
+// cache memory pointers, to be malloc'd later
+static Bit8u * cache_code_start_ptr=NULL;
+static Bit8u * cache_code=NULL;
+static Bit8u * cache_code_link_blocks=NULL;
+
+static CacheBlockDynRec * cache_blocks=NULL;
+static CacheBlockDynRec link_blocks[2];		// default linking (specially marked)
+
+
+// the CodePageHandlerDynRec class provides access to the contained
+// cache blocks and intercepts writes to the code for special treatment
+class CodePageHandlerDynRec : public PageHandler {
+public:
+	CodePageHandlerDynRec() {
+		invalidation_map=NULL;
+	}
+
+	void SetupAt(Bitu _phys_page,PageHandler * _old_pagehandler) {
+		// initialize this codepage handler
+		phys_page=_phys_page;
+		// save the old pagehandler to provide direct read access to the memory,
+		// and to be able to restore it later on
+		old_pagehandler=_old_pagehandler;
+
+		// adjust flags
+		flags=old_pagehandler->flags|PFLAG_HASCODE;
+		flags&=~PFLAG_WRITEABLE;
+
+		active_blocks=0;
+		active_count=16;
+
+		// initialize the maps with zero (no cache blocks as well as code present)
+		memset(&hash_map,0,sizeof(hash_map));
+		memset(&write_map,0,sizeof(write_map));
+		if (invalidation_map!=NULL) {
+			free(invalidation_map);
+			invalidation_map=NULL;
+		}
+	}
+
+	// clear out blocks that contain code which has been modified
+	bool InvalidateRange(Bitu start,Bitu end) {
+		Bits index=1+(end>>DYN_HASH_SHIFT);
+		bool is_current_block=false;	// if the current block is modified, it has to be exited as soon as possible
+
+		Bit32u ip_point=SegPhys(cs)+reg_eip;
+		ip_point=(PAGING_GetPhysicalPage(ip_point)-(phys_page<<12))+(ip_point&0xfff);
+		while (index>=0) {
+			Bitu map=0;
+			// see if there is still some code in the range
+			for (Bitu count=start;count<=end;count++) map+=write_map[count];
+			if (!map) return is_current_block;	// no more code, finished
+
+			CacheBlockDynRec * block=hash_map[index];
+			while (block) {
+				CacheBlockDynRec * nextblock=block->hash.next;
+				// test if this block is in the range
+				if (start<=block->page.end && end>=block->page.start) {
+					if (ip_point<=block->page.end && ip_point>=block->page.start) is_current_block=true;
+					block->Clear();		// clear the block, decrements the write_map accordingly
+				}
+				block=nextblock;
+			}
+			index--;
+		}
+		return is_current_block;
+	}
+
+	// the following functions will clean all cache blocks that are invalid now due to the write
+	void writeb(PhysPt addr,Bitu val){
+		addr&=4095;
+		if (host_readb(hostmem+addr)==(Bit8u)val) return;
+		host_writeb(hostmem+addr,val);
+		// see if there's code where we are writing to
+		if (!host_readb(&write_map[addr])) {
+			if (active_blocks) return;		// still some blocks in this page
+			active_count--;
+			if (!active_count) Release();	// delay page releasing until active_count is zero
+			return;
+		} else if (!invalidation_map) {
+			invalidation_map=(Bit8u*)malloc(4096);
+			memset(invalidation_map,0,4096);
+		}
+		invalidation_map[addr]++;
+		InvalidateRange(addr,addr);
+	}
+	void writew(PhysPt addr,Bitu val){
+		addr&=4095;
+		if (host_readw(hostmem+addr)==(Bit16u)val) return;
+		host_writew(hostmem+addr,val);
+		// see if there's code where we are writing to
+		if (!host_readw(&write_map[addr])) {
+			if (active_blocks) return;		// still some blocks in this page
+			active_count--;
+			if (!active_count) Release();	// delay page releasing until active_count is zero
+			return;
+		} else if (!invalidation_map) {
+			invalidation_map=(Bit8u*)malloc(4096);
+			memset(invalidation_map,0,4096);
+		}
+#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+		host_writew(&invalidation_map[addr],
+			host_readw(&invalidation_map[addr])+0x101);
+#else
+		(*(Bit16u*)&invalidation_map[addr])+=0x101;
+#endif
+		InvalidateRange(addr,addr+1);
+	}
+	void writed(PhysPt addr,Bitu val){
+		addr&=4095;
+		if (host_readd(hostmem+addr)==(Bit32u)val) return;
+		host_writed(hostmem+addr,val);
+		// see if there's code where we are writing to
+		if (!host_readd(&write_map[addr])) {
+			if (active_blocks) return;		// still some blocks in this page
+			active_count--;
+			if (!active_count) Release();	// delay page releasing until active_count is zero
+			return;
+		} else if (!invalidation_map) {
+			invalidation_map=(Bit8u*)malloc(4096);
+			memset(invalidation_map,0,4096);
+		}
+#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+		host_writed(&invalidation_map[addr],
+			host_readd(&invalidation_map[addr])+0x1010101);
+#else
+		(*(Bit32u*)&invalidation_map[addr])+=0x1010101;
+#endif
+		InvalidateRange(addr,addr+3);
+	}
+	bool writeb_checked(PhysPt addr,Bitu val) {
+		addr&=4095;
+		if (host_readb(hostmem+addr)==(Bit8u)val) return false;
+		// see if there's code where we are writing to
+		if (!host_readb(&write_map[addr])) {
+			if (!active_blocks) {
+				// no blocks left in this page, still delay the page releasing a bit
+				active_count--;
+				if (!active_count) Release();
+			}
+		} else {
+			if (!invalidation_map) {
+				invalidation_map=(Bit8u*)malloc(4096);
+				memset(invalidation_map,0,4096);
+			}
+			invalidation_map[addr]++;
+			if (InvalidateRange(addr,addr)) {
+				cpu.exception.which=SMC_CURRENT_BLOCK;
+				return true;
+			}
+		}
+		host_writeb(hostmem+addr,val);
+		return false;
+	}
+	bool writew_checked(PhysPt addr,Bitu val) {
+		addr&=4095;
+		if (host_readw(hostmem+addr)==(Bit16u)val) return false;
+		// see if there's code where we are writing to
+		if (!host_readw(&write_map[addr])) {
+			if (!active_blocks) {
+				// no blocks left in this page, still delay the page releasing a bit
+				active_count--;
+				if (!active_count) Release();
+			}
+		} else {
+			if (!invalidation_map) {
+				invalidation_map=(Bit8u*)malloc(4096);
+				memset(invalidation_map,0,4096);
+			}
+#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+			host_writew(&invalidation_map[addr],
+				host_readw(&invalidation_map[addr])+0x101);
+#else
+			(*(Bit16u*)&invalidation_map[addr])+=0x101;
+#endif
+			if (InvalidateRange(addr,addr+1)) {
+				cpu.exception.which=SMC_CURRENT_BLOCK;
+				return true;
+			}
+		}
+		host_writew(hostmem+addr,val);
+		return false;
+	}
+	bool writed_checked(PhysPt addr,Bitu val) {
+		addr&=4095;
+		if (host_readd(hostmem+addr)==(Bit32u)val) return false;
+		// see if there's code where we are writing to
+		if (!host_readd(&write_map[addr])) {
+			if (!active_blocks) {
+				// no blocks left in this page, still delay the page releasing a bit
+				active_count--;
+				if (!active_count) Release();
+			}
+		} else {
+			if (!invalidation_map) {
+				invalidation_map=(Bit8u*)malloc(4096);
+				memset(invalidation_map,0,4096);
+			}
+#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+			host_writed(&invalidation_map[addr],
+				host_readd(&invalidation_map[addr])+0x1010101);
+#else
+			(*(Bit32u*)&invalidation_map[addr])+=0x1010101;
+#endif
+			if (InvalidateRange(addr,addr+3)) {
+				cpu.exception.which=SMC_CURRENT_BLOCK;
+				return true;
+			}
+		}
+		host_writed(hostmem+addr,val);
+		return false;
+	}
+
+    // add a cache block to this page and note it in the hash map
+	void AddCacheBlock(CacheBlockDynRec * block) {
+		Bitu index=1+(block->page.start>>DYN_HASH_SHIFT);
+		block->hash.next=hash_map[index];	// link to old block at index from the new block
+		block->hash.index=index;
+		hash_map[index]=block;				// put new block at hash position
+		block->page.handler=this;
+		active_blocks++;
+	}
+	// there's a block whose code started in a different page
+    void AddCrossBlock(CacheBlockDynRec * block) {
+		block->hash.next=hash_map[0];
+		block->hash.index=0;
+		hash_map[0]=block;
+		block->page.handler=this;
+		active_blocks++;
+	}
+	// remove a cache block
+	void DelCacheBlock(CacheBlockDynRec * block) {
+		active_blocks--;
+		active_count=16;
+		CacheBlockDynRec * * bwhere=&hash_map[block->hash.index];
+		while (*bwhere!=block) {
+			bwhere=&((*bwhere)->hash.next);
+			//Will crash if a block isn't found, which should never happen.
+		}
+		*bwhere=block->hash.next;
+
+		// remove the cleared block from the write map
+		if (GCC_UNLIKELY(block->cache.wmapmask!=NULL)) {
+			// first part is not influenced by the mask
+			for (Bitu i=block->page.start;i<block->cache.maskstart;i++) {
+				if (write_map[i]) write_map[i]--;
+			}
+			Bitu maskct=0;
+			// last part sticks to the writemap mask
+			for (Bitu i=block->cache.maskstart;i<=block->page.end;i++,maskct++) {
+				if (write_map[i]) {
+					// only adjust writemap if it isn't masked
+					if ((maskct>=block->cache.masklen) || (!block->cache.wmapmask[maskct])) write_map[i]--;
+				}
+			}
+			free(block->cache.wmapmask);
+			block->cache.wmapmask=NULL;
+		} else {
+			for (Bitu i=block->page.start;i<=block->page.end;i++) {
+				if (write_map[i]) write_map[i]--;
+			}
+		}
+	}
+
+	void Release(void) {
+		MEM_SetPageHandler(phys_page,1,old_pagehandler);	// revert to old handler
+		PAGING_ClearTLB();
+
+		// remove page from the lists
+		if (prev) prev->next=next;
+		else cache.used_pages=next;
+		if (next) next->prev=prev;
+		else cache.last_page=prev;
+		next=cache.free_pages;
+		cache.free_pages=this;
+		prev=0;
+	}
+	void ClearRelease(void) {
+		// clear out all cache blocks in this page
+		for (Bitu index=0;index<(1+DYN_PAGE_HASH);index++) {
+			CacheBlockDynRec * block=hash_map[index];
+			while (block) {
+				CacheBlockDynRec * nextblock=block->hash.next;
+				block->page.handler=0;			// no need, full clear
+				block->Clear();
+				block=nextblock;
+			}
+		}
+		Release();	// now can release this page
+	}
+
+	CacheBlockDynRec * FindCacheBlock(Bitu start) {
+		CacheBlockDynRec * block=hash_map[1+(start>>DYN_HASH_SHIFT)];
+		// see if there's a cache block present at the start address
+		while (block) {
+			if (block->page.start==start) return block;	// found
+			block=block->hash.next;
+		}
+		return 0;	// none found
+	}
+
+	HostPt GetHostReadPt(Bitu phys_page) { 
+		hostmem=old_pagehandler->GetHostReadPt(phys_page);
+		return hostmem;
+	}
+	HostPt GetHostWritePt(Bitu phys_page) { 
+		return GetHostReadPt( phys_page );
+	}
+public:
+	// the write map, there are write_map[i] cache blocks that cover the byte at address i
+	Bit8u write_map[4096];
+	Bit8u * invalidation_map;
+	CodePageHandlerDynRec * next, * prev;	// page linking
+private:
+	PageHandler * old_pagehandler;
+
+	// hash map to quickly find the cache blocks in this page
+	CacheBlockDynRec * hash_map[1+DYN_PAGE_HASH];
+
+	Bitu active_blocks;		// the number of cache blocks in this page
+	Bitu active_count;		// delaying parameter to not immediately release a page
+	HostPt hostmem;	
+	Bitu phys_page;
+};
+
+
+static INLINE void cache_addunusedblock(CacheBlockDynRec * block) {
+	// block has become unused, add it to the freelist
+	block->cache.next=cache.block.free;
+	cache.block.free=block;
+}
+
+static CacheBlockDynRec * cache_getblock(void) {
+	// get a free cache block and advance the free pointer
+	CacheBlockDynRec * ret=cache.block.free;
+	if (!ret) E_Exit("Ran out of CacheBlocks" );
+	cache.block.free=ret->cache.next;
+	ret->cache.next=0;
+	return ret;
+}
+
+void CacheBlockDynRec::Clear(void) {
+	Bitu ind;
+	// check if this is not a cross page block
+	if (hash.index) for (ind=0;ind<2;ind++) {
+		CacheBlockDynRec * fromlink=link[ind].from;
+		link[ind].from=0;
+		while (fromlink) {
+			CacheBlockDynRec * nextlink=fromlink->link[ind].next;
+			// clear the next-link and let the block point to the standard linkcode
+			fromlink->link[ind].next=0;
+			fromlink->link[ind].to=&link_blocks[ind];
+
+			fromlink=nextlink;
+		}
+		if (link[ind].to!=&link_blocks[ind]) {
+			// not linked to the standard linkcode, find the block that links to this block
+			CacheBlockDynRec * * wherelink=&link[ind].to->link[ind].from;
+			while (*wherelink != this && *wherelink) {
+				wherelink = &(*wherelink)->link[ind].next;
+			}
+			// now remove the link
+			if(*wherelink) 
+				*wherelink = (*wherelink)->link[ind].next;
+			else {
+				LOG(LOG_CPU,LOG_ERROR)("Cache anomaly. please investigate");
+			}
+		}
+	} else 
+		cache_addunusedblock(this);
+	if (crossblock) {
+		// clear out the crossblock (in the page before) as well
+		crossblock->crossblock=0;
+		crossblock->Clear();
+		crossblock=0;
+	}
+	if (page.handler) {
+		// clear out the code page handler
+		page.handler->DelCacheBlock(this);
+		page.handler=0;
+	}
+	if (cache.wmapmask){
+		free(cache.wmapmask);
+		cache.wmapmask=NULL;
+	}
+}
+
+
+static CacheBlockDynRec * cache_openblock(void) {
+	CacheBlockDynRec * block=cache.block.active;
+	// check for enough space in this block
+	Bitu size=block->cache.size;
+	CacheBlockDynRec * nextblock=block->cache.next;
+	if (block->page.handler) 
+		block->Clear();
+	// block size must be at least CACHE_MAXSIZE
+	while (size<CACHE_MAXSIZE) {
+		if (!nextblock)
+			goto skipresize;
+		// merge blocks
+		size+=nextblock->cache.size;
+		CacheBlockDynRec * tempblock=nextblock->cache.next;
+		if (nextblock->page.handler) 
+			nextblock->Clear();
+		// block is free now
+		cache_addunusedblock(nextblock);
+		nextblock=tempblock;
+	}
+skipresize:
+	// adjust parameters and open this block
+	block->cache.size=size;
+	block->cache.next=nextblock;
+	cache.pos=block->cache.start;
+	return block;
+}
+
+static void cache_closeblock(void) {
+	CacheBlockDynRec * block=cache.block.active;
+	// links point to the default linking code
+	block->link[0].to=&link_blocks[0];
+	block->link[1].to=&link_blocks[1];
+	block->link[0].from=0;
+	block->link[1].from=0;
+	block->link[0].next=0;
+	block->link[1].next=0;
+	// close the block with correct alignment
+	Bitu written=(Bitu)(cache.pos-block->cache.start);
+	if (written>block->cache.size) {
+		if (!block->cache.next) {
+			if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size);	
+		} else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size);	
+	} else {
+		Bitu new_size;
+		Bitu left=block->cache.size-written;
+		// smaller than cache align then don't bother to resize
+		if (left>CACHE_ALIGN) {
+			new_size=((written-1)|(CACHE_ALIGN-1))+1;
+			CacheBlockDynRec * newblock=cache_getblock();
+			// align block now to CACHE_ALIGN
+			newblock->cache.start=block->cache.start+new_size;
+			newblock->cache.size=block->cache.size-new_size;
+			newblock->cache.next=block->cache.next;
+			block->cache.next=newblock;
+			block->cache.size=new_size;
+		}
+	}
+	// advance the active block pointer
+	if (!block->cache.next || (block->cache.next->cache.start>(cache_code_start_ptr + CACHE_TOTAL - CACHE_MAXSIZE))) {
+//		LOG_MSG("Cache full restarting");
+		cache.block.active=cache.block.first;
+	} else {
+		cache.block.active=block->cache.next;
+	}
+}
+
+
+// place an 8bit value into the cache
+static INLINE void cache_addb(Bit8u val) {
+#ifdef HAVE_LIBNX
+	Bit8u* rwPos = (Bit8u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr);
+	*rwPos=val;
+	cache.pos++;
+#else
+	*cache.pos++=val;
+#endif
+}
+
+// place a 16bit value into the cache
+static INLINE void cache_addw(Bit16u val) {
+#ifdef HAVE_LIBNX
+	Bit16u* rwPos = (Bit16u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr);
+	*rwPos=val;
+	cache.pos+=2;
+#else
+	*(Bit16u*)cache.pos=val;
+	cache.pos+=2;
+#endif
+}
+
+// place a 32bit value into the cache
+static INLINE void cache_addd(Bit32u val) {
+#ifdef HAVE_LIBNX
+	Bit32u* rwPos = (Bit32u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr);
+	*rwPos=val;
+	cache.pos+=4;
+#else
+	*(Bit32u*)cache.pos=val;
+	cache.pos+=4;
+#endif
+}
+
+// place a 64bit value into the cache
+static INLINE void cache_addq(Bit64u val) {
+#ifdef HAVE_LIBNX
+	Bit64u* rwPos = (Bit64u*)((intptr_t)cache.pos - (intptr_t)jit_rx_addr + (intptr_t)jit_rw_addr);
+	*rwPos=val;
+	cache.pos+=8;
+#else
+	*(Bit64u*)cache.pos=val;
+	cache.pos+=8;
+#endif
+}
+
+
+static void dyn_return(BlockReturn retcode,bool ret_exception);
+static void dyn_run_code(void);
+
+
+/* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */
+#if (C_HAVE_MPROTECT)
+#define PAGESIZE_TEMP PAGESIZE
+#else 
+#define PAGESIZE_TEMP 4096
+#endif
+
+static bool cache_initialized = false;
+
+static void cache_init(bool enable) {
+	Bits i;
+	if (enable) {
+		// see if cache is already initialized
+		if (cache_initialized) return;
+		cache_initialized = true;
+		if (cache_blocks == NULL) {
+			// allocate the cache blocks memory
+			cache_blocks=(CacheBlockDynRec*)malloc(CACHE_BLOCKS*sizeof(CacheBlockDynRec));
+			if(!cache_blocks) E_Exit("Allocating cache_blocks has failed");
+			memset(cache_blocks,0,sizeof(CacheBlockDynRec)*CACHE_BLOCKS);
+			cache.block.free=&cache_blocks[0];
+			// initialize the cache blocks
+			for (i=0;i<CACHE_BLOCKS-1;i++) {
+				cache_blocks[i].link[0].to=(CacheBlockDynRec *)1;
+				cache_blocks[i].link[1].to=(CacheBlockDynRec *)1;
+				cache_blocks[i].cache.next=&cache_blocks[i+1];
+			}
+		}
+		if (cache_code_start_ptr==NULL) {
+			// allocate the code cache memory
+#if defined (WIN32)
+			cache_code_start_ptr=(Bit8u*)VirtualAlloc(0,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP,
+				MEM_COMMIT,PAGE_EXECUTE_READWRITE);
+			if (!cache_code_start_ptr)
+				cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP);
+#elif defined (HAVE_LIBNX)
+			cache_code_start_ptr=(Bit8u*)mmap(NULL, CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP, 0, 0, 0, 0);
+#elif defined (VITA)
+			sceBlock = sceKernelAllocMemBlockForVM("code", CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP);
+  			if (sceBlock >= 0) {
+    				int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&cache_code_start_ptr);
+				if(ret < 0) {
+					cache_code_start_ptr = null;
+				}
+			}
+#else
+			cache_code_start_ptr=(Bit8u*)malloc(CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP-1+PAGESIZE_TEMP);
+#endif
+			if(!cache_code_start_ptr) E_Exit("Allocating dynamic cache failed");
+
+			// align the cache at a page boundary
+			cache_code=(Bit8u*)(((Bitu)cache_code_start_ptr + PAGESIZE_TEMP-1) & ~(PAGESIZE_TEMP-1));//Bitu is same size as a pointer.
+
+			cache_code_link_blocks=cache_code;
+			cache_code=cache_code+PAGESIZE_TEMP;
+
+#if (C_HAVE_MPROTECT)
+			if(mprotect(cache_code_link_blocks,CACHE_TOTAL+CACHE_MAXSIZE+PAGESIZE_TEMP,PROT_WRITE|PROT_READ|PROT_EXEC))
+				LOG_MSG("Setting execute permission on the code cache has failed");
+#endif
+			CacheBlockDynRec * block=cache_getblock();
+			cache.block.first=block;
+			cache.block.active=block;
+			block->cache.start=&cache_code[0];
+			block->cache.size=CACHE_TOTAL;
+			block->cache.next=0;						// last block in the list
+		}
+		// setup the default blocks for block linkage returns
+		cache.pos=&cache_code_link_blocks[0];
+		link_blocks[0].cache.start=cache.pos;
+		// link code that returns with a special return code
+		dyn_return(BR_Link1,false);
+		cache.pos=&cache_code_link_blocks[32];
+		link_blocks[1].cache.start=cache.pos;
+		// link code that returns with a special return code
+		dyn_return(BR_Link2,false);
+
+		cache.pos=&cache_code_link_blocks[64];
+		core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
+//		link_blocks[1].cache.start=cache.pos;
+		dyn_run_code();
+
+		cache.free_pages=0;
+		cache.last_page=0;
+		cache.used_pages=0;
+		// setup the code pages
+		for (i=0;i<CACHE_PAGES;i++) {
+			CodePageHandlerDynRec * newpage=new CodePageHandlerDynRec();
+			newpage->next=cache.free_pages;
+			cache.free_pages=newpage;
+		}
+	}
+}
+
+static void cache_close(void) {
+/*	for (;;) {
+		if (cache.used_pages) {
+			CodePageHandler * cpage=cache.used_pages;
+			CodePageHandler * npage=cache.used_pages->next;
+			cpage->ClearRelease();
+			delete cpage;
+			cache.used_pages=npage;
+		} else break;
+	}
+	if (cache_blocks != NULL) {
+		free(cache_blocks);
+		cache_blocks = NULL;
+	}
+	if (cache_code_start_ptr != NULL) {
+		### care: under windows VirtualFree() has to be used if
+		###       VirtualAlloc was used for memory allocation
+		free(cache_code_start_ptr);
+		cache_code_start_ptr = NULL;
+	}
+	cache_code = NULL;
+	cache_code_link_blocks = NULL;
+	cache_initialized = false; */
+}
diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h
index c8e2a8ef2cecc93db280a6edc74a3621645d215c..95488575bbdfe69b43d41782e5908d2d5fe1b0d3 100644
--- a/src/cpu/core_dynrec/decoder_basic.h
+++ b/src/cpu/core_dynrec/decoder_basic.h
@@ -995,10 +995,10 @@ skip_extend_word:
 							// succeeded, use the pointer to avoid code invalidation
 							if (!addseg) {
 								if (!scaled_reg_used) {
-									gen_mov_word_to_reg(ea_reg,(void*)val,true);
+									gen_mov_LE_word_to_reg(ea_reg,(void*)val,true);
 								} else {
 									DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0);
-									gen_add(ea_reg,(void*)val);
+									gen_add_LE(ea_reg,(void*)val);
 								}
 							} else {
 								if (!scaled_reg_used) {
@@ -1006,7 +1006,7 @@ skip_extend_word:
 								} else {
 									DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
 								}
-								gen_add(ea_reg,(void*)val);
+								gen_add_LE(ea_reg,(void*)val);
 							}
 							return;
 						}
@@ -1047,10 +1047,10 @@ skip_extend_word:
 						if (!addseg) {
 							if (!scaled_reg_used) {
 								MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
-								gen_add(ea_reg,(void*)val);
+								gen_add_LE(ea_reg,(void*)val);
 							} else {
 								DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0);
-								gen_add(ea_reg,(void*)val);
+								gen_add_LE(ea_reg,(void*)val);
 							}
 						} else {
 							if (!scaled_reg_used) {
@@ -1059,7 +1059,7 @@ skip_extend_word:
 								DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
 							}
 							ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
-							gen_add(ea_reg,(void*)val);
+							gen_add_LE(ea_reg,(void*)val);
 						}
 						return;
 					}
@@ -1124,11 +1124,11 @@ skip_extend_word:
 				// succeeded, use the pointer to avoid code invalidation
 				if (!addseg) {
 					MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
-					gen_add(ea_reg,(void*)val);
+					gen_add_LE(ea_reg,(void*)val);
 				} else {
 					MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base));
 					ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
-					gen_add(ea_reg,(void*)val);
+					gen_add_LE(ea_reg,(void*)val);
 				}
 				return;
 			}
diff --git a/src/cpu/core_dynrec/decoder_opcodes.h b/src/cpu/core_dynrec/decoder_opcodes.h
index 09e356313efb699e555662b7d261be34ff18084b..ae13433155e4fd9e3da1937f607dc908d0a268f0 100644
--- a/src/cpu/core_dynrec/decoder_opcodes.h
+++ b/src/cpu/core_dynrec/decoder_opcodes.h
@@ -250,12 +250,12 @@ static void dyn_prep_word_imm(Bit8u reg) {
 	Bitu val;
 	if (decode.big_op) {
 		if (decode_fetchd_imm(val)) {
-			gen_mov_word_to_reg(FC_OP2,(void*)val,true);
+			gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true);
 			return;
 		}
 	} else {
 		if (decode_fetchw_imm(val)) {
-			gen_mov_word_to_reg(FC_OP2,(void*)val,false);
+			gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false);
 			return;
 		}
 	}
@@ -287,13 +287,13 @@ static void dyn_mov_word_imm(Bit8u reg) {
 	Bitu val;
 	if (decode.big_op) {
 		if (decode_fetchd_imm(val)) {
-			gen_mov_word_to_reg(FC_OP1,(void*)val,true);
+			gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true);
 			MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg);
 			return;
 		}
 	} else {
 		if (decode_fetchw_imm(val)) {
-			gen_mov_word_to_reg(FC_OP1,(void*)val,false);
+			gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false);
 			MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg);
 			return;
 		}
@@ -330,7 +330,7 @@ static void dyn_mov_byte_direct_al() {
 	if (decode.big_addr) {
 		Bitu val;
 		if (decode_fetchd_imm(val)) {
-			gen_add(FC_ADDR,(void*)val);
+			gen_add_LE(FC_ADDR,(void*)val);
 		} else {
 			gen_add_imm(FC_ADDR,(Bit32u)val);
 		}
@@ -1179,7 +1179,7 @@ static void dyn_ret_near(Bitu bytes) {
 		gen_call_function_raw((void*)&dynrec_pop_word);
 		gen_extend_word(false,FC_RETOP);
 	}
-	gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),true);
+	gen_mov_word_from_reg(FC_RETOP,(void*)(&reg_eip),true);
 
 	if (bytes) gen_add_direct_word(&reg_esp,bytes,true);
 	dyn_return(BR_Normal);
diff --git a/src/cpu/core_dynrec/risc_ppc.h b/src/cpu/core_dynrec/risc_ppc.h
new file mode 100644
index 0000000000000000000000000000000000000000..b43dbfd3179a2c823510616d2ce6ff6ddf8bb79d
--- /dev/null
+++ b/src/cpu/core_dynrec/risc_ppc.h
@@ -0,0 +1,897 @@
+/*
+ *  Copyright (C) 2002-2019  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+// some configuring defines that specify the capabilities of this architecture
+// or aspects of the recompiling
+
+// protect FC_ADDR over function calls if necessaray
+//#define DRC_PROTECT_ADDR_REG
+
+// try to use non-flags generating functions if possible
+#define DRC_FLAGS_INVALIDATION
+// try to replace _simple functions by code
+#define DRC_FLAGS_INVALIDATION_DCODE
+
+// type with the same size as a pointer
+#define DRC_PTR_SIZE_IM Bit32u
+
+// calling convention modifier
+#define DRC_FC /* nothing */
+#define DRC_CALL_CONV /* nothing */
+
+#define DRC_USE_REGS_ADDR
+#define DRC_USE_SEGS_ADDR
+
+// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value)
+#define USE_SDA_BASE
+
+// register mapping
+enum HostReg {
+	HOST_R0=0,
+	HOST_R1,
+	HOST_R2,
+	HOST_R3,
+	HOST_R4,
+	HOST_R5,
+	HOST_R6,
+	HOST_R7,
+	HOST_R8,
+	HOST_R9,
+	HOST_R10,
+	HOST_R11,
+	HOST_R12,
+	HOST_R13,
+	HOST_R14,
+	HOST_R15,
+	HOST_R16,
+	HOST_R17,
+	HOST_R18,
+	HOST_R19,
+	HOST_R20,
+	HOST_R21,
+	HOST_R22,
+	HOST_R23,
+	HOST_R24,
+	HOST_R25,
+	HOST_R26,
+	HOST_R27,
+	HOST_R28,
+	HOST_R29,
+	HOST_R30,
+	HOST_R31,
+
+	HOST_NONE
+};
+
+static const HostReg RegParams[] = {
+	HOST_R3, HOST_R4, HOST_R5, HOST_R6,
+	HOST_R7, HOST_R8, HOST_R9, HOST_R10
+};
+
+#if C_FPU
+extern struct FPU_rec fpu;
+#endif
+
+#if defined(USE_SDA_BASE)
+extern Bit32u _SDA_BASE_[];
+#endif
+
+// register that holds function return values
+#define FC_RETOP HOST_R3
+
+// register used for address calculations, if the ABI does not
+// state that this register is preserved across function calls
+// then define DRC_PROTECT_ADDR_REG above
+#define FC_ADDR HOST_R29
+
+// register that points to Segs[]
+#define FC_SEGS_ADDR HOST_R30
+// register that points to cpu_regs[]
+#define FC_REGS_ADDR HOST_R31
+
+// register that holds the first parameter
+#define FC_OP1 RegParams[0]
+
+// register that holds the second parameter
+#define FC_OP2 RegParams[1]
+
+// special register that holds the third parameter for _R3 calls (byte accessible)
+#define FC_OP3 RegParams[2]
+
+// register that holds byte-accessible temporary values
+//#define FC_TMP_BA1 HOST_R6
+#define FC_TMP_BA1 FC_OP2
+
+// register that holds byte-accessible temporary values
+//#define FC_TMP_BA2 HOST_R7
+#define FC_TMP_BA2 FC_OP1
+
+// temporary register for LEA
+#define TEMP_REG_DRC HOST_R10
+
+#define IMM(op, regsd, rega, imm)           (((op)<<26)|((regsd)<<21)|((rega)<<16)|             (((Bit32u)(imm))&0xFFFF))
+#define EXT(regsd, rega, regb, op, rc)      (  (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)|          ((op)<<1)|(rc))
+#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)|  ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc))
+
+#define IMM_OP(op, regsd, rega, imm)           cache_addd(IMM(op, regsd, rega, imm))
+#define EXT_OP(regsd, rega, regb, op, rc)      cache_addd(EXT(regsd, rega, regb, op, rc))
+#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
+
+// move a full register from reg_src to reg_dst
+static void gen_mov_regs(HostReg reg_dst,HostReg reg_src)
+{
+	if (reg_dst != reg_src)
+		EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src)
+}
+
+// move a 16bit constant value into dest_reg
+// the upper 16bit of the destination register may be destroyed
+static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)
+{
+	IMM_OP(14, dest_reg, 0, imm); // li dest,imm
+}
+
+DRC_PTR_SIZE_IM block_ptr;
+
+// Helper for loading addresses
+static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest)
+{
+	Bit32s off;
+
+	if ((Bit16s)addr == addr)
+		return HOST_R0;
+
+	off = addr - (Bit32s)&Segs;
+	if ((Bit16s)off == off)
+	{
+		addr = off;
+		return FC_SEGS_ADDR;
+	}
+
+	off = addr - (Bit32s)&cpu_regs;
+	if ((Bit16s)off == off)
+	{
+		addr = off;
+		return FC_REGS_ADDR;
+	}
+
+	off = addr - (Bit32s)block_ptr;
+	if ((Bit16s)off == off)
+	{
+		addr = off;
+		return HOST_R27;
+	}
+
+#if C_FPU
+	off = addr - (Bit32s)&fpu;
+	if ((Bit16s)off == off)
+	{
+		addr = off;
+		return HOST_R28;
+	}
+#endif
+
+#if defined(USE_SDA_BASE)
+	off = addr - (Bit32s)_SDA_BASE_;
+	if ((Bit16s)off == off)
+	{
+		addr = off;
+		return HOST_R13;
+	}
+#endif
+
+	IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha
+	addr = (Bit16s)addr;
+	return dest;
+}
+
+// move a 32bit constant value into dest_reg
+static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)
+{
+	HostReg ld = gen_addr((Bit32s&)imm, dest_reg);
+	if (imm || ld != dest_reg)
+		IMM_OP(14, dest_reg, ld, imm);   // addi dest_reg, ldr, imm@l
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
+	Bit32s addr = (Bit32s)data;
+	HostReg ld = gen_addr(addr, dest_reg);
+	IMM_OP(dword ? 32:40, dest_reg, ld, addr);  // lwz/lhz dest, addr@l(ld)
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg
+static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) {
+	Bit32u addr = (Bit32u)data;
+	gen_mov_dword_to_reg_imm(dest_reg, addr);
+	EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest
+}
+
+// move an 8bit constant value into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
+	gen_mov_word_to_reg_imm(dest_reg, imm);
+}
+
+// move an 8bit constant value into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
+	gen_mov_word_to_reg_imm(dest_reg, imm);
+}
+
+// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
+static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword)
+{
+	Bit32s addr = (Bit32s)dest;
+	HostReg ld = gen_addr(addr, HOST_R8);
+	IMM_OP(dword ? 36 : 44, src_reg, ld, addr);  // stw/sth src,addr@l(ld)
+}
+
+// move an 8bit value from memory into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data)
+{
+	Bit32s addr = (Bit32s)data;
+	HostReg ld = gen_addr(addr, dest_reg);
+	IMM_OP(34, dest_reg, ld, addr);  // lbz dest,addr@l(ld)
+}
+
+// move an 8bit value from memory into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
+	gen_mov_byte_to_reg_low(dest_reg, data);
+}
+
+// move the lowest 8bit of a register into memory
+static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest)
+{
+	Bit32s addr = (Bit32s)dest;
+	HostReg ld = gen_addr(addr, HOST_R8);
+	IMM_OP(38, src_reg, ld, addr);  // stb src_reg,addr@l(ld)
+}
+
+// convert an 8bit word to a 32bit dword
+// the register is zero-extended (sign==false) or sign-extended (sign==true)
+static void gen_extend_byte(bool sign,HostReg reg)
+{
+	if (sign)
+	{
+		EXT_OP(reg, reg, 0, 954, 0); // extsb reg, reg
+		return;
+	}
+
+	// check if previous instruction is "lbz reg, *
+	if ((*(Bit32u*)(cache.pos-4) & 0xFFE00000) != IMM(34, reg, 0, 0))
+		RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, reg, 0, 24, 31
+	// else register is already zero-extended
+}
+
+// convert a 16bit word to a 32bit dword
+// the register is zero-extended (sign==false) or sign-extended (sign==true)
+static void gen_extend_word(bool sign,HostReg reg)
+{
+	// check if previous instruction is "lhz reg, *"
+	Bit32u *op = (Bit32u*)(cache.pos-4);
+	if ((*op & 0xFFE00000) == IMM(40, reg, 0, 0))
+	{
+		if (sign) // change lhz -> lha
+			*op |= 0x08000000;
+		// else zero-extension already done
+		return;
+	}
+
+	if (sign)
+		EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
+	else
+		RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
+}
+
+// add a 32bit value from memory to a full register
+static void gen_add(HostReg reg,void* op)
+{
+	gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
+	EXT_OP(reg,reg,HOST_R8,266,0);          // add reg,reg,r8
+}
+
+// add a 32bit value from host memory to a full register
+static void gen_add_LE(HostReg reg,void* op)
+{
+	gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24);
+	EXT_OP(reg,reg,HOST_R8,266,0);       // add reg,reg,r8
+}
+
+// add a 32bit constant value to a full register
+static void gen_add_imm(HostReg reg,Bit32u imm)
+{
+	if ((Bit16s)imm != (Bit32s)imm)
+		IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
+	if ((Bit16s)imm)
+		IMM_OP(14, reg, reg, imm);              // addi reg, reg, imm@l
+}
+
+// and a 32bit constant value with a full register
+static void gen_and_imm(HostReg reg,Bit32u imm) {
+	Bits sbit,ebit,tbit,bbit,abit,i;
+
+	// sbit = number of leading 0 bits
+	// ebit = number of trailing 0 bits
+	// tbit = number of total 0 bits
+	// bbit = number of leading 1 bits
+	// abit = number of trailing 1 bits
+
+	if (imm == 0xFFFFFFFF)
+		return;
+
+	if (!imm)
+		return gen_mov_word_to_reg_imm(reg, 0);
+
+	sbit = ebit = tbit = bbit = abit = 0;
+	for (i=0; i < 32; i++)
+	{
+		if (!(imm & (1<<(31-i))))
+		{
+			abit = 0;
+			tbit++;
+			if (sbit == i)
+				sbit++;
+			ebit++;
+		}
+		else
+		{
+			ebit = 0;
+			if (bbit == i)
+				bbit++;
+			abit++;
+		}
+	}
+
+	if (sbit >= 16)
+	{
+		IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
+		return;
+	}
+	if (ebit >= 16)
+	{
+		IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
+		return;
+	}
+
+	if (sbit + ebit == tbit)
+	{
+		RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
+		return;
+	}
+
+	if (bbit + abit == (32 - tbit))
+	{
+		RLW_OP(21,reg,reg,0,31-abit,bbit,0); // rlwinm reg,reg,0,31-abit,bbit
+		return;
+	}
+
+	gen_mov_dword_to_reg_imm(HOST_R8, imm);
+	EXT_OP(reg, reg, HOST_R8, 28, 0);  // and reg, reg, r8
+}
+
+// move a 32bit constant value into memory
+static void gen_mov_direct_dword(void* dest,Bit32u imm) {
+	gen_mov_dword_to_reg_imm(HOST_R9, imm);
+	gen_mov_word_from_reg(HOST_R9, dest, 1);
+}
+
+// move an address into memory (assumes address != NULL)
+static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm)
+{
+	block_ptr = 0;
+	gen_mov_dword_to_reg_imm(HOST_R27, imm);
+	// this will probably be used to look-up the linked blocks
+	block_ptr = imm;
+	gen_mov_word_from_reg(HOST_R27, dest, 1);
+}
+
+// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value
+static void gen_add_direct_word(void* dest,Bit32u imm,bool dword)
+{
+	HostReg ld;
+	Bit32s addr = (Bit32s)dest;
+
+	if (!dword)
+	{
+		imm &= 0xFFFF;
+		addr += 2;
+	}
+
+	if (!imm)
+		return;
+
+	ld = gen_addr(addr, HOST_R8);
+	IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
+	if (dword && (Bit16s)imm != (Bit32s)imm)
+		IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
+	if (!dword || (Bit16s)imm)
+		IMM_OP(14, HOST_R9, HOST_R9, imm);      // addi r9,r9,imm@l
+	IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
+}
+
+// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value
+static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
+	gen_add_direct_word(dest, -(Bit32s)imm, dword);
+}
+
+// effective address calculation, destination is dest_reg
+// scale_reg is scaled by scale (scale_reg*(2^scale)) and
+// added to dest_reg, then the immediate value is added
+static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)
+{
+	if (scale)
+	{
+		RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // rlwinm r8,scale_reg,scale,0,31-scale
+		scale_reg = HOST_R8;
+	}
+
+	gen_add_imm(dest_reg, imm);
+	EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
+}
+
+// effective address calculation, destination is dest_reg
+// dest_reg is scaled by scale (dest_reg*(2^scale)),
+// then the immediate value is added
+static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm)
+{
+	if (scale)
+		RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // rlwinm dest,dest,scale,0,31-scale
+
+	gen_add_imm(dest_reg, imm);
+}
+
+// helper function to choose direct or indirect call
+static void INLINE do_gen_call(void *func, Bit32u *pos)
+{
+	Bit32s f = (Bit32s)func;
+	Bit32s off = f - (Bit32s)pos;
+
+	// relative branches are limited to +/- ~32MB
+	if (off < 0x02000000 && off >= -0x02000000)
+	{
+		pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func
+		pos[1] = IMM(24, 0, 0, 0); // nop
+		pos[2] = IMM(24, 0, 0, 0);
+		pos[3] = IMM(24, 0, 0, 0);
+		return;
+	}
+
+	pos[0] = IMM(15, HOST_R8, 0, f>>16);      // lis r8,imm@h
+	pos[1] = IMM(24, HOST_R8, HOST_R8, f);    // ori r8,r8,imm@l
+	pos[2] = EXT(HOST_R8, 9, 0, 467, 0);      // mtctr r8
+	pos[3] = IMM(19, 0b10100, 0, (528<<1)|1); // bctrl
+}
+
+// generate a call to a parameterless function
+static void INLINE gen_call_function_raw(void * func)
+{
+	do_gen_call(func, (Bit32u*)cache.pos);
+	cache.pos += 16;
+}
+
+// generate a call to a function with paramcount parameters
+// note: the parameters are loaded in the architecture specific way
+// using the gen_load_param_ functions below
+static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false)
+{
+	Bit32u proc_addr=(Bit32u)cache.pos;
+	gen_call_function_raw(func);
+	return proc_addr;
+}
+
+// load an immediate value as param'th function parameter
+static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
+	gen_mov_dword_to_reg_imm(RegParams[param], imm);
+}
+
+// load an address as param'th function parameter
+static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
+	gen_load_param_imm(addr, param);
+}
+
+// load a host-register as param'th function parameter
+static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
+	gen_mov_regs(RegParams[param], (HostReg)reg);
+}
+
+// load a value from memory as param'th function parameter
+static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
+	gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
+}
+
+// jump to an address pointed at by ptr, offset is in imm
+static void gen_jmp_ptr(void * ptr,Bits imm=0) {
+	gen_mov_word_to_reg(HOST_R8,ptr,true);                // r8 = *(Bit32u*)ptr
+	if ((Bit16s)imm != (Bit32s)imm)
+		IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha
+	IMM_OP(32, HOST_R8, HOST_R8, imm);                    // lwz r8, imm@l(r8)
+	EXT_OP(HOST_R8, 9, 0, 467, 0);                        // mtctr r8
+	IMM_OP(19, 0b10100, 0, 528<<1);                       // bctr
+}
+
+// short conditional jump (+-127 bytes) if register is zero
+// the destination is set by gen_fill_branch() later
+static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword)
+{
+	if (!dword)
+		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
+	else
+		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
+
+	IMM_OP(16, 0b01100, 2, 0); // bc 12,CR0[Z] (beq)
+	return ((Bit32u)cache.pos-4);
+}
+
+// short conditional jump (+-127 bytes) if register is nonzero
+// the destination is set by gen_fill_branch() later
+static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword)
+{
+	if (!dword)
+		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
+	else
+		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
+
+	IMM_OP(16, 0b00100, 2, 0); // bc 4,CR0[Z] (bne)
+	return ((Bit32u)cache.pos-4);
+}
+
+// calculate relative offset and fill it into the location pointed to by data
+static void gen_fill_branch(DRC_PTR_SIZE_IM data)
+{
+#if C_DEBUG
+	Bits len=(Bit32u)cache.pos-data;
+	if (len<0) len=-len;
+	if (len >= 0x8000) LOG_MSG("Big jump %d",len);
+#endif
+
+	((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC;
+}
+
+
+// conditional jump if register is nonzero
+// for isdword==true the 32bit of the register are tested
+// for isdword==false the lowest 8bit of the register are tested
+static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword)
+{
+	if (!dword)
+		IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
+	else
+		EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
+
+	IMM_OP(16, 0b00100, 2, 0); // bne
+	return ((Bit32u)cache.pos-4);
+}
+
+// compare 32bit-register against zero and jump if value less/equal than zero
+static Bit32u gen_create_branch_long_leqzero(HostReg reg)
+{
+	EXT_OP(reg,reg,reg,444,1); // or. reg,reg,reg
+
+	IMM_OP(16, 0b00100, 1, 0); // ble
+	return ((Bit32u)cache.pos-4);
+}
+
+// calculate long relative offset and fill it into the location pointed to by data
+static void gen_fill_branch_long(Bit32u data) {
+	return gen_fill_branch((DRC_PTR_SIZE_IM)data);
+}
+
+static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
+#if defined(__GNUC__)
+	Bit8u* start = (Bit8u*)((Bit32u)block_start & -32);
+
+	while (start < block_start + block_size)
+	{
+		asm volatile("dcbst %y0; icbi %y0" :: "Z"(*start));
+		start += 32;
+	}
+	asm volatile("sync; isync");
+#else
+	#error "Don't know how to flush/invalidate CacheBlock with this compiler"
+#endif
+}
+
+static void cache_block_before_close(void) {}
+
+// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
+static Bit32s epilog_addr;
+static Bit8u *getCF_glue;
+static void gen_run_code(void) {
+	// prolog
+	IMM_OP(37, HOST_R1, HOST_R1, -32); // stwu sp,-32(sp)
+	EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
+	EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
+
+	IMM_OP(47, HOST_R26, HOST_R1, 8); // stmw r26, 8(sp)
+
+	IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16);  // lis FC_SEGS_ADDR, Segs@h
+	IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs);     // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l
+
+	IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16);  // lis FC_REGS_ADDR, cpu_regs@h
+	IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs);     // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l
+
+#if C_FPU
+	IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16);  // lis r28, fpu@h
+	IMM_OP(24, HOST_R28, HOST_R28, &fpu);         // ori r28, r28, fpu@l
+#endif
+
+	IMM_OP(36, HOST_R0, HOST_R1, 32+4); // stw r0,32+4(sp)
+	IMM_OP(19, 0b10100, 0, 528<<1);     // bctr
+
+	// epilog
+	epilog_addr = (Bit32s)cache.pos;
+	IMM_OP(32, HOST_R0, HOST_R1, 32+4); // lwz r0,32+4(sp)
+	IMM_OP(46, HOST_R26, HOST_R1, 8);    // lmw r26, 8(sp)
+	EXT_OP(HOST_R0, 8, 0, 467, 0);      // mtlr r0
+	IMM_OP(14, HOST_R1, HOST_R1, 32);      // addi sp, sp, 32
+	IMM_OP(19, 0b10100, 0, 16<<1);         // blr
+
+	// trampoline to call get_CF()
+	getCF_glue = cache.pos;
+	gen_mov_dword_to_reg_imm(FC_OP1, (Bit32u)get_CF); // FC_OP1 = &get_CF
+	EXT_OP(FC_OP1, 9, 0, 467, 0);   // mtctr FC_OP1
+	IMM_OP(19, 0b10100, 0, 528<<1); // bctr
+}
+
+// return from a function
+static void gen_return_function(void)
+{
+	Bit32s off = epilog_addr - (Bit32s)cache.pos;
+
+	// relative branches are limited to +/- 32MB
+	if (off < 0x02000000 && off >= -0x02000000) {
+		cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b epilog
+		return;
+	}
+
+	gen_mov_dword_to_reg_imm(HOST_R8, epilog_addr);
+	EXT_OP(HOST_R8, 9, 0, 467, 0);  // mtctr r8
+	IMM_OP(19, 0b10100, 0, 528<<1); // bctr
+}
+
+// called when a call to a function can be replaced by a
+// call to a simpler function
+static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type)
+{
+	Bit32u *op = (Bit32u*)pos;
+	Bit32u *end = op+4;
+
+	switch (flags_type) {
+#if defined(DRC_FLAGS_INVALIDATION_DCODE)
+		// try to avoid function calls but rather directly fill in code
+		case t_ADDb:
+		case t_ADDw:
+		case t_ADDd:
+			*op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_ORb:
+		case t_ORw:
+		case t_ORd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_ADCb:
+		case t_ADCw:
+		case t_ADCd:
+			op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
+			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
+			op[2] = IMM(12, HOST_R0, FC_RETOP, -1);        // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = CF!=0)
+			op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0);    // addze; FC_RETOP = r26 + CF!=0
+			return;
+		case t_SBBb:
+		case t_SBBw:
+		case t_SBBd:
+			op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0);  // r26 = FC_OP1 - FC_OP2
+			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
+			op[2] = IMM(8, HOST_R0, FC_RETOP, 0);          // subfic r0, FC_RETOP, 0 (XER[CA] = CF==0)
+			op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0);    // addme; FC_RETOP = r26 - 1 + CF==0
+			return;
+		case t_ANDb:
+		case t_ANDw:
+		case t_ANDd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_SUBb:
+		case t_SUBw:
+		case t_SUBd:
+			*op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
+			break;
+		case t_XORb:
+		case t_XORw:
+		case t_XORd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_CMPb:
+		case t_CMPw:
+		case t_CMPd:
+		case t_TESTb:
+		case t_TESTw:
+		case t_TESTd:
+			break;
+		case t_INCb:
+		case t_INCw:
+		case t_INCd:
+			*op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
+			break;
+		case t_DECb:
+		case t_DECw:
+		case t_DECd:
+			*op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
+			break;
+		case t_NEGb:
+		case t_NEGw:
+		case t_NEGd:
+			*op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
+			break;
+		case t_SHLb:
+		case t_SHLw:
+		case t_SHLd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_SHRb:
+		case t_SHRw:
+		case t_SHRd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
+			break;
+		case t_SARb:
+			*op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
+		case t_SARw:
+			if (flags_type == t_SARw)
+				*op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
+		case t_SARd:
+			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
+			break;
+
+		case t_ROLb:
+			*op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
+		case t_ROLw:
+			if (flags_type == t_ROLw)
+				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
+		case t_ROLd:
+			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
+			break;
+
+		case t_RORb:
+			*op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
+		case t_RORw:
+			if (flags_type == t_RORw)
+				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
+		case t_RORd:
+			*op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
+			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
+			break;
+
+		case t_DSHLw: // technically not correct for FC_OP3 > 16
+			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
+			*op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
+			break;
+		case t_DSHLd:
+			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
+			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
+			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
+			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
+			return;
+		case t_DSHRw: // technically not correct for FC_OP3 > 16
+			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
+			*op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
+			break;
+		case t_DSHRd:
+			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
+			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
+			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
+			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
+			return;
+#endif
+		default:
+			do_gen_call(fct_ptr, op);
+			return;
+	}
+
+	do
+	{
+		*op++ = IMM(24, 0, 0, 0); // nop
+	} while (op < end);
+}
+
+// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
+	IMM_OP(40, dest_reg, FC_SEGS_ADDR, index); // lhz dest_reg, index(FC_SEGS_ADDR)
+}
+
+// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
+	IMM_OP(32, dest_reg, FC_SEGS_ADDR, index); // lwz dest_reg, index(FC_SEGS_ADDR)
+}
+
+// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
+static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
+	gen_mov_seg32_to_reg(HOST_R8, index);
+	EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8
+}
+
+// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
+	IMM_OP(40, dest_reg, FC_REGS_ADDR, index); // lhz dest_reg, index(FC_REGS_ADDR)
+}
+
+// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
+	IMM_OP(32, dest_reg, FC_REGS_ADDR, index); // lwz dest_reg, index(FC_REGS_ADDR)
+}
+
+// move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
+	IMM_OP(34, dest_reg, FC_REGS_ADDR, index); // lbz dest_reg, index(FC_REGS_ADDR)
+}
+
+// move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
+	gen_mov_regbyte_to_reg_low(dest_reg, index);
+}
+
+// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
+static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
+	IMM_OP(44, src_reg, FC_REGS_ADDR, index); // sth src_reg, index(FC_REGS_ADDR)
+}
+
+// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
+	IMM_OP(36, src_reg, FC_REGS_ADDR, index); // stw src_reg, index(FC_REGS_ADDR)
+}
+
+// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
+static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
+	IMM_OP(38, src_reg, FC_REGS_ADDR, index); // stb src_reg, index(FC_REGS_ADDR)
+}
+
+// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
+	gen_mov_regval32_to_reg(HOST_R8, index);
+	EXT_OP(reg, reg, HOST_R8, 266, 0); // add reg, reg, HOST_R8
+}
+
+// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
+static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
+	IMM_OP(dword ? 36 : 44, src_reg, FC_REGS_ADDR, index); // stw/sth src_reg, index(FC_REGS_ADDR)
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
+	IMM_OP(dword ? 32 : 40, dest_reg, FC_REGS_ADDR, index); // lwz/lhz dest_reg, index(FC_REGS_ADDR)
+}
+