//
// (C) 2004 Mike Brent aka Tursi aka HarmlessLion.com
// This software is provided AS-IS. No warranty
// express or implied is provided.
//
// This notice defines the entire license for this code.
// All rights not explicity granted here are reserved by the
// author.
//
// You may redistribute this software provided the original
// archive is UNCHANGED and a link back to my web page,
// http://harmlesslion.com, is provided as the author's site.
// It is acceptable to link directly to a subpage at harmlesslion.com
// provided that page offers a URL for that purpose
//
// Source code, if available, is provided for educational purposes
// only. You are welcome to read it, learn from it, mock
// it, and hack it up - for your own use only.
//
// Please contact me before distributing derived works or
// ports so that we may work out terms. I don't mind people
// using my code but it's been outright stolen before. In all
// cases the code must maintain credit to the original author(s).
//
// -COMMERCIAL USE- Contact me first. I didn't make
// any money off it - why should you? ;) If you just learned
// something from this, then go ahead. If you just pinched
// a routine or two, let me know, I'll probably just ask
// for credit. If you want to derive a commercial tool
// or use large portions, we need to talk. ;)
//
// If this, itself, is a derived work from someone else's code,
// then their original copyrights and licenses are left intact
// and in full force.
//
// http://harmlesslion.com - visit the web page for contact info
//
/////////////////////////////////////////////////////////////////////
// Classic99 - TMS9900 CPU Routines
// M.Brent
// The TMS9900 is a 16-bit CPU by Texas Instruments, with a 16
// bit data and 16-bit address path, capable of addressing
// 64k of memory. All reads and writes are word (16-bit) oriented.
// Byte access is simulated within the CPU by reading or writing
// the entire word, and manipulating only the requested byte.
// This is not currently emulated here. The CPU uses external
// RAM for all user registers. There are 16 user registers, R0-R15,
// and the memory base for these registers may be anywhere in
// memory, set by the Workspace Pointer. The CPU also has a Program
// Counter and STatus register internal to it.
// This emulation generates a lookup table of addresses for each
// opcode. It's not currently intended for use outside of Classic99
// and there may be problems with dependancy on other parts of the
// code if it is moved to another project.
// Word is defined to be an unsigned 16-bit integer (__int16)
// Byte is defined to be an unsigned 8-bit integer (__int8)
/////////////////////////////////////////////////////////////////////

#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0500
#include <stdio.h>
#include <windows.h>
#include <vector>
#include "tiemul.h"
#include "cpu9900.h"

extern bool BreakOnIllegal;							// true if we should trigger a breakpoint on bad opcode
extern CPU9900 * volatile pCurrentCPU;
extern CPU9900 *pCPU, *pGPU;
extern int bInterleaveGPU;

/////////////////////////////////////////////////////////////////////
// Status register defines
/////////////////////////////////////////////////////////////////////
#if 0
// defined in tiemul.h
#define BIT_LGT 0x8000
#define BIT_AGT 0x4000
#define BIT_EQ  0x2000
#define BIT_C   0x1000
#define BIT_OV  0x0800
#define BIT_OP  0x0400
#define BIT_X   0x0200
#endif

#define ST_LGT (ST & BIT_LGT)						// Logical Greater Than
#define ST_AGT (ST & BIT_AGT)						// Arithmetic Greater Than
#define ST_EQ  (ST & BIT_EQ)						// Equal
#define ST_C   (ST & BIT_C)							// Carry
#define ST_OV  (ST & BIT_OV)						// Overflow
#define ST_OP  (ST & BIT_OP)						// Odd Parity
#define ST_X   (ST & BIT_X)							// Set during an XOP instruction
#define ST_INTMASK (ST&0x000f)						// Interrupt mask (the TI uses only values 0 and 2)

#define set_LGT (ST|=0x8000)						// Logical Greater than: >0x0000
#define set_AGT (ST|=0x4000)						// Arithmetic Greater than: >0x0000 and <0x8000
#define set_EQ  (ST|=0x2000)						// Equal: ==0x0000
#define set_C   (ST|=0x1000)						// Carry: carry occurred during operation
#define set_OV  (ST|=0x0800)						// Overflow: overflow occurred during operation
#define set_OP  (ST|=0x0400)						// Odd parity: word has odd number of '1' bits
#define set_X   (ST|=0x0200)						// Executing 'X' statement

#define reset_LGT (ST&=0x7fff)						// Clear the flags
#define reset_AGT (ST&=0xbfff)
#define reset_EQ  (ST&=0xdfff)
#define reset_C   (ST&=0xefff)
#define reset_OV  (ST&=0xf7ff)
#define reset_OP  (ST&=0xfbff)
#define reset_X   (ST&=0xfdff)

// Group clears
#define reset_EQ_LGT (ST&=0x5fff)
#define reset_LGT_AGT_EQ (ST&=0x1fff)
#define reset_LGT_AGT_EQ_OP (ST&=0x1bff)
#define reset_EQ_LGT_AGT_OV (ST&=0x17ff)
#define reset_EQ_LGT_AGT_C (ST&=0x0fff)
#define reset_EQ_LGT_AGT_C_OV (ST&=0x7ff)
#define reset_EQ_LGT_AGT_C_OV_OP (ST&=0x3ff)

// Assignment masks
#define mask_EQ_LGT (BIT_EQ|BIT_LGT)
#define mask_LGT_AGT_EQ (BIT_LGT|BIT_AGT|BIT_EQ)
#define mask_LGT_AGT_EQ_OP (BIT_LGT|BIT_AGT|BIT_EQ|BIT_OP)
#define mask_LGT_AGT_EQ_OV (BIT_LGT|BIT_AGT|BIT_EQ|BIT_OV)
#define mask_LGT_AGT_EQ_OV_C (BIT_LGT|BIT_AGT|BIT_EQ|BIT_OV|BIT_C)		// carry here used for INC and NEG only

// Status register lookup table (hey, what's another 64k these days??) -- shared
Word WStatusLookup[64*1024];
Word BStatusLookup[256];

// Note: Post-increment is a trickier case that it looks at first glance.
// For operations like MOV R3,*R3+ (from Corcomp's memory test), the address
// value is written to the same address before the increment occurs.
// There are even trickier cases in the console like MOV *R3+,@>0008(R3),
// where the post-increment happens before the destination address calculation.
// Thus it appears the steps need to happen in this order:
//
// 1) Calculate source address
// 2) Get Source data
// 3) Handle source post-increment
// 4) Calculate destination address
// 5) Store destination data
// 6) Handle Destination post-increment
//
// Only the following instruction formats support post-increment:
// FormatI
// FormatIII (src only) (destination can not post-increment)
// FormatIV (src only) (has no destination)
// FormatVI (src only) (has no destination)
// FormatIX (src only) (has no destination)

// NOTE: this keeps it safe, but forces a PC from 0x8000-0x83ff into the 0x83xx range
// This is wrong, of course, but even more so since the RAM is repeated from
// 0x8100-0x8300, and not at 0x8000. However, 0x8000-0x80ff is the memory mapped
// hardware, so if the Program Counter is there, we're in trouble ANYWAY! ;)
#define ADDPC(x) { PC+=(x); PC&=0xffff; if ((PC&0xfc00)==0x8000) PC|=0x300; }	

/////////////////////////////////////////////////////////////////////
// Inlines for getting source and destination addresses
/////////////////////////////////////////////////////////////////////
#define FormatI { Td=(in&0x0c00)>>10; Ts=(in&0x0030)>>4; D=(in&0x03c0)>>6; S=(in&0x000f); B=(in&0x1000)>>12; fixS(); }
#define FormatII { D=(in&0x00ff); }
#define FormatIII { Td=0; Ts=(in&0x0030)>>4; D=(in&0x03c0)>>6; S=(in&0x000f); B=0; fixS(); }
#define FormatIV { D=(in&0x03c0)>>6; Ts=(in&0x0030)>>4; S=(in&0x000f); B=(D>8); fixS(); }			// No destination (CRU ops)
#define FormatV { D=(in&0x00f0)>>4; S=(in&0x000f); S=WP+(S<<1); }
#define FormatVI { Ts=(in&0x0030)>>4; S=in&0x000f; B=0; fixS(); }									// No destination (single argument instructions)
#define FormatVII {}																				// no argument
#define FormatVIII_0 { D=(in&0x000f); D=WP+(D<<1); }
#define FormatVIII_1 { D=(in&0x000f); D=WP+(D<<1); S=ROMWORD(PC); ADDPC(2); }
#define FormatIX  { D=(in&0x03c0)>>6; Ts=(in&0x0030)>>4; S=(in&0x000f); B=0; fixS(); }				// No destination here (dest calc'd after call) (DIV, MUL, XOP)

//////////////////////////////////////////////////////////////////////////
// Arrays to handle post-increment on registers - separate for source and dest
// There are probably better ways to handle this. 
//////////////////////////////////////////////////////////////////////////
// Register number to increment, ORd with 0x80 for 2, or 0x40 for 1
#define SRC 0
#define DST 1
#define POSTINC2 0x80
#define POSTINC1 0x40

CPU9900::CPU9900() {
	buildcpu();
	pType="9900";
}

void CPU9900::reset() {
	StopIdle();
	nReturnAddress=0;

	// zero out the post increment tracking
	nPostInc[SRC]=0;
	nPostInc[DST]=0;

	WP=romword(0x0000);						// Reset 
	PC=romword(0x0002);
	X_flag=0;								// not currently executing an X instruction
	ST=(ST&0xfff0);							// disable interrupts
	SetCycleCount(26);						// not that it's a big deal, but that's how long reset takes ;)
}

/////////////////////////////////////////////////////////////////////
// Wrapper functions for memory access
/////////////////////////////////////////////////////////////////////
Byte CPU9900::RCPUBYTE(Word src) {
	Word ReadVal=romword(src);
	if (src&1) {
		return ReadVal&0xff;
	} else {
		return ReadVal>>8;
	}
}

void CPU9900::WCPUBYTE(Word dest, Byte c) {
	Word ReadVal=romword(dest, true);	// read-before-write needed, of course!
	if (dest&1) {
		wrword(dest, (Word)((ReadVal&0xff00) | c));
	} else {
		wrword(dest, (Word)((ReadVal&0x00ff) | (c<<8)));
	}
}

Word CPU9900::ROMWORD(Word src) {
	// nothing special here yet
	return romword(src);
}

void CPU9900::WRWORD(Word dest, Word val) {
	// read-before write always! (TODO: actually, this does NOT happen on some instructions like LI - check datasheet
	// and specifically the number of memory accesses, to get the exceptions)
	romword(dest, true);		// don't need the return
	wrword(dest, val);
}

Word CPU9900::GetSafeWord(int x, int bank) {
	x&=0xfffe;
	return (GetSafeByte(x, bank)<<8)|GetSafeByte(x+1, bank);
}

// Read a byte withOUT triggering the hardware - for monitoring
Byte CPU9900::GetSafeByte(int x, int bank) {
	return GetSafeCpuByte(x, bank);
}

//////////////////////////////////////////////////////////////////////////
// Get addresses for the destination and source arguments
// Note: the format code letters are the official notation from Texas
// instruments. See their TMS9900 documentation for details.
// (Td, Ts, D, S, B, etc)
// Note that some format codes set the destination type (Td) to
// '4' in order to skip unneeded processing of the Destination address
//////////////////////////////////////////////////////////////////////////
void CPU9900::fixS()
{
	int temp,t2;													// temp vars

	switch (Ts)														// source type
	{ 
	case 0: S=WP+(S<<1); 
			break;													// register						(R1)			Address is the address of the register

	case 1: 
			S=ROMWORD(WP+(S<<1)); 
			AddCycleCount(4); 
			break;													// register indirect			(*R1)			Address is the contents of the register

	case 2: 
			if (S) { 
				S=ROMWORD(PC)+ROMWORD(WP+(S<<1)); 					// indexed						(@>1000(R1))	Address is the contents of the argument plus the
			} else {												//												contents of the register
				S=ROMWORD(PC); 										// symbolic						(@>1000)		Address is the contents of the argument
			}
			ADDPC(2); 
			AddCycleCount(8);
			break;

	case 3: 
			nPostInc[SRC] = S | (B==1?POSTINC1:POSTINC2);			// do the increment after the opcode is done with the source
			t2=WP+(S<<1); 
			temp=ROMWORD(t2); 
			S=temp;			
			AddCycleCount((B==1?6:8));								// (add 1 if byte, 2 if word)	(*R1+)			Address is the contents of the register, which
			break;													// register indirect autoincrement				is incremented by 1 for byte or 2 for word ops
	}
}

void CPU9900::fixD()
{
	int temp,t2;													// temp vars

	switch (Td)														// destination type 
	{																// same as the source types
	case 0: 
			D=WP+(D<<1); 
			break;													// register

	case 1: D=ROMWORD(WP+(D<<1)); 
			AddCycleCount(4);
			break;													// register indirect

	case 2: 
			if (D) { 
				D=ROMWORD(PC)+ROMWORD(WP+(D<<1));					// indexed 
			} else {
				D=ROMWORD(PC);										// symbolic
			}
			ADDPC(2);
			AddCycleCount(8);
			break;

	case 3: nPostInc[DST] = D | (B==1?POSTINC1:POSTINC2);			// do the increment after the opcode is done with the dest
			t2=WP+(D<<1);											// (add 1 if byte, 2 if word)
			temp=ROMWORD(t2); 
			D=temp; 
			AddCycleCount((B==1?6:8)); 
			break;													// register indirect autoincrement
	}
}

/////////////////////////////////////////////////////////////////////////
// Check parity in the passed byte and set the OP status bit
/////////////////////////////////////////////////////////////////////////
void CPU9900::parity(Byte x)
{
	int z;															// temp vars

	for (z=0; x; x&=(x-1)) z++;										// black magic?
	
	if (z&1)														// set bit if an odd number
		set_OP; 
	else 
		reset_OP;
}

// Helpers for what used to be global variables
void CPU9900::StartIdle() {
	idling = 1;
}
void CPU9900::StopIdle() {
	idling = 0;
}
int  CPU9900::GetIdle() {
	return idling;
}
void CPU9900::SetReturnAddress(Word x) {
	nReturnAddress = x;
}
int CPU9900::GetReturnAddress() {
	return nReturnAddress;
}
void CPU9900::ResetCycleCount() {
	InterlockedExchange((LONG*)&nCycleCount, 0);
}
void CPU9900::AddCycleCount(int val) {
	InterlockedExchangeAdd((LONG*)&nCycleCount, val);
}
int CPU9900::GetCycleCount() {
	return nCycleCount;
}
void CPU9900::SetCycleCount(int x) {
	InterlockedExchange((LONG*)&nCycleCount, x);
}
void CPU9900::TriggerInterrupt(Word vector) {
	// I don't think this is legal on the F18A
	Word NewWP = ROMWORD(vector);
	Word NewPC = ROMWORD(vector+2);

	WRWORD(NewWP+26,WP);				// WP in new R13 
	WRWORD(NewWP+28,PC);				// PC in new R14 
	WRWORD(NewWP+30,ST);				// ST in new R15 
	//ST=(ST&0xfff0);					// disable interrupts
				
	/* now load the correct workspace, and perform a branch and link to the address */
	/* (level 1 interrupt assumed as that's all we have) */
	WP=NewWP;
	PC=NewPC;

	AddCycleCount(22);
}
Word CPU9900::GetPC() {
	return PC;
}
void CPU9900::SetPC(Word x) {	// should rarely be externally used (Classic99 uses it for disk emulation)
	PC=x;
}
Word CPU9900::GetST() {
	return ST;
}
void CPU9900::SetST(Word x) {
	ST=x;
}
Word CPU9900::GetWP() {
	return WP;
}
void CPU9900::SetWP(Word x) {
	WP=x;
}
Word CPU9900::GetX() {
	return X_flag;
}
void CPU9900::SetX(Word x) {
	X_flag=x;
}

Word CPU9900::ExecuteOpcode() {
	in=ROMWORD(PC);					// ie: not an 'X' command
	ADDPC(2);						// thanks to Jeff Brown for explaining that!

	CALL_MEMBER_FN(this, opcode[in])();

	return in;
}

////////////////////////////////////////////////////////////////////
// Classic99 - 9900 CPU opcodes
// Opcode functions follow
// one function for each opcode (the mneumonic prefixed with "op_")
// src - source address (register or memory - valid types vary)
// dst - destination address
// imm - immediate value
// dsp - relative displacement
////////////////////////////////////////////////////////////////////

/////////////////////////////////////////////////////////////////////
// DO NOT USE wcpubyte or rcpubyte in here! You'll break the RMW
// emulation and the cycle counting! You'll also break the F18A. ;)
// The 9900 can only do word access.
/////////////////////////////////////////////////////////////////////
#define wcpubyte #error Do not use in this file
#define rcpubyte #error Do not use in this file
#define romword  #error Do not use in this file
#define wrword	 #error Do not use in this file

void CPU9900::post_inc(int nWhich) {
	if (nPostInc[nWhich]) { 
		int i = nPostInc[nWhich] & 0xf;
		int t2=WP+(i<<1); 

		int nTmpCycles = nCycleCount;
		Word nTmpVal = GetSafeWord(t2, xbBank);	// we need to reread this value, but the memory access can't count for cycles
		SetCycleCount(nTmpCycles);

		WRWORD(t2, nTmpVal + ((nPostInc[nWhich]&POSTINC2) ? 2 : 1)); 
		nPostInc[nWhich]=0;
	} 
}

void CPU9900::op_a()
{
	// Add words: A src, dst

	// TODO: A is a good example of how I think I have my cycle counting wrt memory access wrong.
	// The datasheet says 4 memory accesses. I assumed that meant read source, read destination,
	// read-before-write, write destination. But that doesn't make sense, we already read the
	// destination, and it doesn't account for the instruction.
	// I think that the datasheet includes reading (1 word) for the instruction, and the remaining
	// memory cycles are the execution of it. This makes a little more sense and explains my
	// disconnect between emulator speed and real speed. The Read/Write cycle, on opcodes that
	// need to do it, can be a Read/Modify/Write. This includes things like register auto-increment.
	// My counting issue, then, is being too naive on the read-before-write, and doing it all the
	// time. We need to re-work the memory cycle counting (probably at the same time as bringing
	// the AMS inline). Then we should be pretty accurate.
	// So on that theory, A should be 1 memory access to read the instruction, 1 to read the source,
	// 1 to read the destination, and 1 to write the modified destination back.
	// This also helps explain opcodes like LWPI, which do not read-before-write, but still show
	// two memory accesses (read opcode, write destination)

	Word x1,x2,x3;

	FormatI;
	x1=ROMWORD(S); 
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
	x3=x2+x1; 
	WRWORD(D,x3);
	post_inc(DST);
																						// most of these are the same for every opcode.
	reset_EQ_LGT_AGT_C_OV;																// We come out with either EQ or LGT, never both
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	if (x3<x2) set_C;																	// if it wrapped around, set carry
	if (((x1&0x8000)==(x2&0x8000))&&((x3&0x8000)!=(x2&0x8000))) set_OV;					// if it overflowed or underflowed (signed math), set overflow
	
	AddCycleCount(14);
}

void CPU9900::op_ab()
{ 
	// Add bytes: A src, dst
	Byte x1,x2,x3;

	FormatI;
	x1=RCPUBYTE(S); 
	post_inc(SRC);

	fixD();
	x2=RCPUBYTE(D);
	x3=x2+x1;
	WCPUBYTE(D,x3);
	post_inc(DST);
	
	reset_EQ_LGT_AGT_C_OV_OP;
	ST|=BStatusLookup[x3]&mask_LGT_AGT_EQ_OP;

	if (x3<x2) set_C;
	if (((x1&0x80)==(x2&0x80))&&((x3&0x80)!=(x2&0x80))) set_OV;
	
	AddCycleCount(14);
}

void CPU9900::op_abs()
{ 
	// ABSolute value: ABS src
	
	Word x1,x2;

	FormatVI;
	x1=ROMWORD(S);

	if (x1&0x8000) {
		x2=(~x1)+1;																		// if negative, make positive
		WRWORD(S,x2);
		AddCycleCount(2);
	}
	post_inc(SRC);

	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	AddCycleCount(12);
}

void CPU9900::op_ai()
{ 
	// Add Immediate: AI src, imm
	
	Word x1,x3;

	FormatVIII_1;
	x1=ROMWORD(D);

	x3=x1+S;
	WRWORD(D,x3);

	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	if (x3<x1) set_C;
	if (((x1&0x8000)==(S&0x8000))&&((x3&0x8000)!=(S&0x8000))) set_OV;
	
	AddCycleCount(14);
}

void CPU9900::op_dec()
{ 
	// DECrement: DEC src
	
	Word x1;

	FormatVI;
	x1=ROMWORD(S);

	x1--;
	WRWORD(S,x1);
	post_inc(SRC);

	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x1!=0xffff) set_C;
	if (x1==0x7fff) set_OV;
	
	AddCycleCount(10);
}

void CPU9900::op_dect()
{ 
	// DECrement by Two: DECT src
	
	Word x1;

	FormatVI;
	x1=ROMWORD(S);

	x1-=2;
	WRWORD(S,x1);
	post_inc(SRC);
	
	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x1<0xfffe) set_C;
	if ((x1==0x7fff)||(x1==0x7ffe)) set_OV;
	
	AddCycleCount(10);
}

void CPU9900::op_div()
{ 
	// DIVide: DIV src, dst
	// Dest, a 2 word number, is divided by src. The result is stored as two words at the dst:
	// the first is the whole number result, the second is the remainder

	Word x1,x2; 
	unsigned __int32 x3;

	FormatIX;
	x2=ROMWORD(S);
	post_inc(SRC);

	D=WP+(D<<1);
	x3=ROMWORD(D);

	if (x2>x3)						// x2 can not be zero because they're unsigned										
	{ 
		x3=(x3<<16)|ROMWORD(D+2);
		x1=(Word)(x3/x2);
		WRWORD(D,x1);
		x1=(Word)(x3%x2);
		WRWORD(D+2,x1);
		reset_OV;
		AddCycleCount(92);			// This is not accurate. (Up to 124 "depends on the partial quotient after each clock cycle during execution")
	}
	else
	{
		set_OV;						// division wasn't possible - change nothing
		AddCycleCount(16);
	}
}

void CPU9900::op_inc()
{ 
	// INCrement: INC src
	
	Word x1;

	FormatVI;
	x1=ROMWORD(S);
	
	x1++;
	WRWORD(S,x1);
	post_inc(SRC);
	
	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV_C;
	
	AddCycleCount(10);
}

void CPU9900::op_inct()
{ 
	// INCrement by Two: INCT src
	
	Word x1;

	FormatVI;
	x1=ROMWORD(S);
	
	x1+=2;
	WRWORD(S,x1);
	post_inc(SRC);
	
	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x1<2) set_C;
	if ((x1==0x8000)||(x1==0x8001)) set_OV;
	
	AddCycleCount(10);
}

void CPU9900::op_mpy()
{ 
	// MultiPlY: MPY src, dst
	// Multiply src by dest and store 32-bit result

	Word x1; 
	unsigned __int32 x3;

	FormatIX;
	x1=ROMWORD(S);
	post_inc(SRC);
	
	D=WP+(D<<1);
	x3=ROMWORD(D);
	x3=x3*x1;
	WRWORD(D,(Word)(x3>>16)); 
	WRWORD(D+2,(Word)(x3&0xffff));
	
	AddCycleCount(52);
}

void CPU9900::op_neg()
{ 
	// NEGate: NEG src

	Word x1;

	FormatVI;
	x1=ROMWORD(S);

	x1=(~x1)+1;
	WRWORD(S,x1);
	post_inc(SRC);

	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV_C;
	
	AddCycleCount(12);
}

void CPU9900::op_s()
{ 
	// Subtract: S src, dst

	Word x1,x2,x3;

	FormatI;
	x1=ROMWORD(S); 
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
	x3=x2-x1;
	WRWORD(D,x3);
	post_inc(DST);

	reset_EQ_LGT_AGT_C_OV;
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	// any number minus 0 sets carry.. my theory is that converting 0 to the two's complement
	// is causing the carry flag to be set.
	if ((x3<x2) || (x1==0)) set_C;
	if (((x1&0x8000)!=(x2&0x8000))&&((x3&0x8000)!=(x2&0x8000))) set_OV;
	
	AddCycleCount(14);
}

void CPU9900::op_sb()
{ 
	// Subtract Byte: SB src, dst

	Byte x1,x2,x3;

	FormatI;
	x1=RCPUBYTE(S); 
	post_inc(SRC);

	fixD();
	x2=RCPUBYTE(D);
	x3=x2-x1;
	WCPUBYTE(D,x3);
	post_inc(DST);

	reset_EQ_LGT_AGT_C_OV_OP;
	ST|=BStatusLookup[x3]&mask_LGT_AGT_EQ_OP;

	// any number minus 0 sets carry.. my theory is that converting 0 to the two's complement
	// is causing the carry flag to be set.
	if ((x3<x2) || (x1==0)) set_C;
	if (((x1&0x80)!=(x2&0x80))&&((x3&0x80)!=(x2&0x80))) set_OV;
	
	AddCycleCount(14);
}

void CPU9900::op_b()
{ 
	// Branch: B src
	// Unconditional absolute branch

	FormatVI;
	PC=S;
	post_inc(SRC);
	
	AddCycleCount(8);
}

void CPU9900::op_bl()
{	
	// Branch and Link: BL src
	// Essentially a subroutine jump - return address is stored in R11
	// Note there is no stack, and no official return function.
	// A return is simply B *R11. Some assemblers define RT as this.

	FormatVI;
	if (0 == GetReturnAddress()) {
		SetReturnAddress(PC);
	}
	WRWORD(WP+22,PC);
	PC=S;
	post_inc(SRC);

	AddCycleCount(12);
}

void CPU9900::op_blwp()
{ 
	// Branch and Load Workspace Pointer: BLWP src
	// A context switch. The src address points to a 2 word table.
	// the first word is the new workspace address, the second is
	// the address to branch to. The current Workspace Pointer,
	// Program Counter (return address), and Status register are
	// stored in the new R13, R14 and R15, respectively
	// Return is performed with RTWP

	Word x1;

	FormatVI;
	if (0 == GetReturnAddress()) {
		SetReturnAddress(PC);
	}
	x1=WP;
	WP=ROMWORD(S);
	WRWORD(WP+26,x1);
	WRWORD(WP+28,PC);
	WRWORD(WP+30,ST);
	PC=ROMWORD(S+2);
	post_inc(SRC);

	skip_interrupt=1;
	
	AddCycleCount(26);
}

void CPU9900::op_jeq()
{ 
	// Jump if equal: JEQ dsp
	// Conditional relative branch. The displacement is a signed byte representing
	// the number of words to branch

	FormatII;
	if (ST_EQ) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jgt()
{ 
	// Jump if Greater Than: JGT dsp

	FormatII;
	if (ST_AGT) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jhe()
{ 
	// Jump if High or Equal: JHE dsp

	FormatII;
	if ((ST_LGT)||(ST_EQ)) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jh()
{ 
	// Jump if High: JH dsp
	
	FormatII;
	if ((ST_LGT)&&(!ST_EQ)) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jl()
{
	// Jump if Low: JL dsp

  	FormatII;
	if ((!ST_LGT)&&(!ST_EQ)) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jle()
{ 
	// Jump if Low or Equal: JLE dsp

	FormatII;
	if ((!ST_LGT)||(ST_EQ)) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jlt()
{ 
	// Jump if Less Than: JLT dsp

	FormatII;
	if ((!ST_AGT)&&(!ST_EQ)) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jmp()
{ 
	// JuMP: JMP dsp
	// (unconditional)
	
	FormatII;
	if (X_flag) {
		PC = X_flag;	// Update offset - it's relative to the X, not the opcode
	}
	if (D&0x80) {
		D=128-(D&0x7f);
		ADDPC(-(D+D));
	} else {
		ADDPC(D+D);
	}

	AddCycleCount(10);
}

void CPU9900::op_jnc()
{ 
	// Jump if No Carry: JNC dsp
	
	FormatII;
	if (!ST_C) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jne()
{ 
	// Jump if Not Equal: JNE dsp

	FormatII;
	if (!ST_EQ) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jno()
{ 
	// Jump if No Overflow: JNO dsp

	FormatII;
	if (!ST_OV) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_jop()
{ 
	// Jump on Odd Parity: JOP dsp

	FormatII;
	if (ST_OP) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_joc()
{ 
	// Jump On Carry: JOC dsp

	FormatII;
	if (ST_C) 
	{
		if (X_flag) {
			PC = X_flag;	// Update offset - it's relative to the X, not the opcode
		}

		if (D&0x80) {
			D=128-(D&0x7f);
			ADDPC(-(D+D));
		} else {
			ADDPC(D+D);
		}
		AddCycleCount(10);
	} else {
		AddCycleCount(8);
	}
}

void CPU9900::op_rtwp()
{ 
	// ReTurn with Workspace Pointer: RTWP
	// The matching return for BLWP, see BLWP for description
	FormatVII;

	ST=ROMWORD(WP+30);
	PC=ROMWORD(WP+28);
	WP=ROMWORD(WP+26);

	AddCycleCount(14);
}

void CPU9900::op_x()
{ 
	// eXecute: X src
	// The argument is interpreted as an instruction and executed

	if (X_flag!=0) 
	{
		warn("Recursive X instruction!!!!!");
		// While it will probably work (recursive X), I don't like the idea ;)
		// Barry Boone says that it does work, although if you recursively
		// call X in a register (ie: X R4 that contains X R4), you will lock
		// up the CPU so bad even the LOAD interrupt can't recover it.
		// We don't emulate that lockup here in Classic99, but of course it
		// will just spin forever.
	}

	FormatVI;
	in=ROMWORD(S);
	post_inc(SRC);		// does this go before or after the eXecuted instruction??
	skip_interrupt=1;	// (ends up having no effect because we call the function inline, but technically still correct)
	AddCycleCount(8-4);	// For X, add this time to the execution time of the instruction found at the source address, minus 4 clock cycles and 1 memory access. 

	X_flag=PC;			// set flag and save true post-X address for the JMPs (AFTER X's oprands but BEFORE the instruction's oprands, if any)

	CALL_MEMBER_FN(this, opcode[in])();

	X_flag=0;			// clear flag
}

void CPU9900::op_xop()
{ 
	// eXtended OPeration: XOP src ???
	// The CPU maintains a jump table starting at 0x0040, containing BLWP style
	// jumps for each operation. In addition, the new R11 gets a copy of the address of
	// the source operand.
	// Apparently not all consoles supported both XOP 1 and 2 (depends on the ROM?)
	// so it is probably rarely, if ever, used on the TI99.
	
	Word x1;

	FormatIX;
	D&=0xf;

	x1=WP;
	WP=ROMWORD(0x0040+(D<<2));
	WRWORD(WP+22,S);
	post_inc(SRC);
	WRWORD(WP+26,x1);
	WRWORD(WP+28,PC);
	WRWORD(WP+30,ST);
	PC=ROMWORD(0x0042+(D<<2));
	set_X;

	skip_interrupt=1;

	AddCycleCount(36);
}

void CPU9900::op_c()
{ 
	// Compare words: C src, dst
	
	Word x3,x4;		// unsigned 16 bit

	FormatI;
	x3=ROMWORD(S); 
	post_inc(SRC);

	fixD();
	x4=ROMWORD(D); 
	post_inc(DST);

	reset_LGT_AGT_EQ;
	if (x3>x4) set_LGT;
	if (x3==x4) set_EQ;
	if ((x3&0x8000)==(x4&0x8000)) {
		if (x3>x4) set_AGT;
	} else {
		if (x4&0x8000) set_AGT;
	}

	AddCycleCount(14);
}

void CPU9900::op_cb()
{ 
	// Compare Bytes: CB src, dst

	Byte x3,x4;

	FormatI;
	x3=RCPUBYTE(S); 
	post_inc(SRC);

	fixD();
	x4=RCPUBYTE(D); 
	post_inc(DST);
  
	reset_LGT_AGT_EQ_OP;
	if (x3>x4) set_LGT;
	if (x3==x4) set_EQ;
	if ((x3&0x80)==(x4&0x80)) {
		if (x3>x4) set_AGT;
	} else {
		if (x4&0x80) set_AGT;
	}
	ST|=BStatusLookup[x3]&BIT_OP;

	AddCycleCount(14);
}

void CPU9900::op_ci()
{ 
	// Compare Immediate: CI src, imm
	
	Word x3;

	FormatVIII_1;
	x3=ROMWORD(D); 
  
	reset_LGT_AGT_EQ;
	if (x3>S) set_LGT;
	if (x3==S) set_EQ;
	if ((x3&0x8000)==(S&0x8000)) {
		if (x3>S) set_AGT;
	} else {
		if (S&0x8000) set_AGT;
	}

	AddCycleCount(14);
}

void CPU9900::op_coc()
{ 
	// Compare Ones Corresponding: COC src, dst
	// Basically comparing against a mask, if all set bits in the src match
	// set bits in the dest (mask), the equal bit is set

	Word x1,x2,x3;

	FormatIII;
	x1=ROMWORD(S);
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
	
	x3=x1&x2;
  
	if (x3==x1) set_EQ; else reset_EQ;

	AddCycleCount(14);
}

void CPU9900::op_czc()
{ 
	// Compare Zeros Corresponding: CZC src, dst
	// The opposite of COC. Each set bit in the dst (mask) must
	// match up with a zero bit in the src to set the equals flag

	Word x1,x2,x3;

	FormatIII;
	x1=ROMWORD(S);
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
	
	x3=x1&x2;
  
	if (x3==0) set_EQ; else reset_EQ;

	AddCycleCount(14);
}

void CPU9900::op_ldcr()
{ 
	// LoaD CRu - LDCR src, dst
	// Writes dst bits serially out to the CRU registers
	// The CRU is the 9901 Communication chip, tightly tied into the 9900.
	// It's serially accessed and has 4096 single bit IO registers.
	// It's stupid and thinks 0 is true and 1 is false.
	// All addresses are offsets from the value in R12, which is divided by 2

	Word x1,x3,cruBase; 
	int x2;

	FormatIV;
	if (D==0) D=16;
	x1=(D<9 ? RCPUBYTE(S) : ROMWORD(S));
	post_inc(SRC);
  
	x3=1;
	cruBase=(ROMWORD(WP+24)>>1)&0xfff;
	for (x2=0; x2<D; x2++)
	{ 
		wcru(cruBase+x2, (x1&x3) ? 1 : 0);
		x3=x3<<1;
	}

	AddCycleCount(20+2*D);

	if (D>8) return;

	reset_LGT_AGT_EQ;
	if (D<9) {
		reset_OP;
		ST|=BStatusLookup[x1&0xff]&mask_LGT_AGT_EQ_OP;
	} else {
		ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;
	}
}

void CPU9900::op_sbo()
{ 
	// Set Bit On: SBO src
	// Sets a bit in the CRU
	
	Word add;

	FormatII;
	add=(ROMWORD(WP+24)>>1)&0xfff;
	if (D&0x80) {
		add-=128-(D&0x7f);
	} else {
		add+=D;
	}
	wcru(add,1);

	AddCycleCount(12);
}

void CPU9900::op_sbz()
{ 
	// Set Bit Zero: SBZ src
	// Zeros a bit in the CRU

	Word add;

	FormatII;
	add=(ROMWORD(WP+24)>>1)&0xfff;
	if (D&0x80) {
		add-=128-(D&0x7f);
	} else {
		add+=D;
	}
	wcru(add,0);

	AddCycleCount(12);
}

void CPU9900::op_stcr()
{ 
	// STore CRU: STCR src, dst
	// Stores dst bits from the CRU into src

	Word x1,x3,x4, cruBase; 
	int x2;

	FormatIV;
	if (D==0) D=16;
	x1=0; x3=1;
  
	cruBase=(ROMWORD(WP+24)>>1)&0xfff;
	for (x2=0; x2<D; x2++)
	{ 
		x4=rcru(cruBase+x2);
		if (x4) 
		{
			x1=x1|x3;
		}
		x3<<=1;
	}

	if (D<9) 
	{
		WCPUBYTE(S,(Byte)(x1&0xff));  
	}
	else 
	{
		WRWORD(S,x1);
	}
	post_inc(SRC);

	if (D<8) {
		AddCycleCount(42);
	} else if (D < 9) {
		AddCycleCount(44);
	} else if (D < 16) {
		AddCycleCount(58);
	} else {
		AddCycleCount(60);
	}

	if (D>8) return;

	reset_LGT_AGT_EQ;
	if (D<9) {
		reset_OP;
		ST|=BStatusLookup[x1&0xff]&mask_LGT_AGT_EQ_OP;
	} else {
		ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;
	}
}

void CPU9900::op_tb()
{ 
	// Test Bit: TB src
	// Tests a CRU bit

	Word add;

	FormatII;
	add=(ROMWORD(WP+24)>>1)&0xfff;
	if (D&0x80) {
		add-=128-(D&0x7f);
	} else {
		add+=D;
	}

	if (rcru(add)) set_EQ; else reset_EQ;

	AddCycleCount(12);
}

// These instructions are valid 9900 instructions but are invalid on the TI-99, as they generate
// improperly decoded CRU instructions.

void CPU9900::op_ckof()
{ 
	FormatVII;
	warn("ClocK OFf instruction encountered!");					// not supported on 99/4A
	// This will set A0-A2 to 110 and pulse CRUCLK (so not emulated)

	AddCycleCount(12);
}

void CPU9900::op_ckon()
{ 
	FormatVII;
	warn("ClocK ON instruction encountered!");					// not supported on 99/4A
	// This will set A0-A2 to 101 and pulse CRUCLK (so not emulated)

	AddCycleCount(12);
}

void CPU9900::op_idle()
{
	FormatVII;
	warn("IDLE instruction encountered!");						// not supported on 99/4A
	// This sets A0-A2 to 010, and pulses CRUCLK until an interrupt is received
	// Although it's not supposed to be used on the TI, at least one game
	// (Slymoids) uses it - perhaps to sync with the VDP? So we'll emulate it someday

	// TODO: we can't do this today. Everything is based on CPU cycles, which means
	// when the CPU stops, so does the VDP, 9901, etc, so no interrupt ever comes in
	// to wake up the system. This will be okay when the VDP is the timing source.
//	SetIdle();
	AddCycleCount(12);
}

void CPU9900::op_rset()
{
	FormatVII;
	warn("ReSET instruction encountered!");						// not supported on 99/4A
	// This will set A0-A2 to 011 and pulse CRUCLK (so not emulated)
	// However, it does have an effect, it zeros the interrupt mask
	ST&=0xfff0;

	AddCycleCount(12);
}

void CPU9900::op_lrex()
{
	FormatVII;
	warn("Load or REstart eXecution instruction encountered!");	// not supported on 99/4A
	// This will set A0-A2 to 111 and pulse CRUCLK (so not emulated)

	AddCycleCount(12);
}

void CPU9900::op_li()
{
	// Load Immediate: LI src, imm

	FormatVIII_1;
	WRWORD(D,S);
	
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[S]&mask_LGT_AGT_EQ;

	AddCycleCount(12);
}

void CPU9900::op_limi()
{ 
	// Load Interrupt Mask Immediate: LIMI imm
	// Sets the CPU interrupt mask

	FormatVIII_1;
	ST=(ST&0xfff0)|(S&0xf);

	AddCycleCount(16);
}

void CPU9900::op_lwpi()
{ 
	// Load Workspace Pointer Immediate: LWPI imm
	// changes the Workspace Pointer

	FormatVIII_1;
	WP=S;

	AddCycleCount(10);
}

void CPU9900::op_mov()
{ 
	// MOVe words: MOV src, dst

	Word x1;

	FormatI;
	x1=ROMWORD(S);
	post_inc(SRC);
	
	fixD();
	WRWORD(D,x1);
	post_inc(DST);
  
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_movb()
{ 
	// MOVe Bytes: MOVB src, dst

	Byte x1;

	FormatI;
	x1=RCPUBYTE(S);
	post_inc(SRC);
	
	fixD();
	WCPUBYTE(D,x1);
	post_inc(DST);
	
	reset_LGT_AGT_EQ_OP;
	ST|=BStatusLookup[x1]&mask_LGT_AGT_EQ_OP;

	AddCycleCount(14);
}

void CPU9900::op_stst()
{ 
	// STore STatus: STST src
	// Copy the status register to memory

	FormatVIII_0;
	WRWORD(D,ST);

	AddCycleCount(8);
}

void CPU9900::op_stwp()
{ 
	// STore Workspace Pointer: STWP src
	// Copy the workspace pointer to memory

	FormatVIII_0;
	WRWORD(D,WP);

	AddCycleCount(8);
}

void CPU9900::op_swpb()
{ 
	// SWaP Bytes: SWPB src
	// swap the high and low bytes of a word

	Word x1,x2;

	FormatVI;
	x1=ROMWORD(S);

	x2=((x1&0xff)<<8)|(x1>>8);
	WRWORD(S,x2);
	post_inc(SRC);

	AddCycleCount(10);
}

void CPU9900::op_andi()
{ 
	// AND Immediate: ANDI src, imm

	Word x1,x2;

	FormatVIII_1;

	x1=ROMWORD(D);
	x2=x1&S;
	WRWORD(D,x2);
	
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x2]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_ori()
{ 
	// OR Immediate: ORI src, imm

	Word x1,x2;

	FormatVIII_1;

	x1=ROMWORD(D);
  	x2=x1|S;
	WRWORD(D,x2);
  
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x2]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_xor()
{ 
	// eXclusive OR: XOR src, dst

	Word x1,x2,x3;

	FormatIII;
	x1=ROMWORD(S);
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
  
	x3=x1^x2;
	WRWORD(D,x3);
  
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_inv()
{ 
	// INVert: INV src

	Word x1;

	FormatVI;

	x1=ROMWORD(S);
  	x1=~x1;
	WRWORD(S,x1);
  	post_inc(SRC);

	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	AddCycleCount(10);
}

void CPU9900::op_clr()
{ 
	// CLeaR: CLR src
	// sets word to 0

	FormatVI;
	WRWORD(S,0);
	post_inc(SRC);

	AddCycleCount(10);
}

void CPU9900::op_seto()
{ 
	// SET to One: SETO src
	// sets word to 0xffff

	FormatVI;
	WRWORD(S,0xffff);
	post_inc(SRC);

	AddCycleCount(10);
}

void CPU9900::op_soc()
{ 
	// Set Ones Corresponding: SOC src, dst
	// Essentially performs an OR - setting all the bits in dst that
	// are set in src

	Word x1,x2,x3;

	FormatI;
	x1=ROMWORD(S);
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
  	x3=x1|x2;
	WRWORD(D,x3);
	post_inc(DST);
  
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_socb()
{ 
	// Set Ones Corresponding, Byte: SOCB src, dst

	Byte x1,x2,x3;

	FormatI;
	x1=RCPUBYTE(S);
	post_inc(SRC);

	fixD();
	x2=RCPUBYTE(D);
  	x3=x1|x2;
	WCPUBYTE(D,x3);
	post_inc(DST);

	reset_LGT_AGT_EQ_OP;
	ST|=BStatusLookup[x3]&mask_LGT_AGT_EQ_OP;

	AddCycleCount(14);
}

void CPU9900::op_szc()
{ 
	// Set Zeros Corresponding: SZC src, dst
	// Zero all bits in dest that are zeroed in src

	Word x1,x2,x3;

	FormatI;
	x1=ROMWORD(S);
	post_inc(SRC);

	fixD();
	x2=ROMWORD(D);
  	x3=(~x1)&x2;
	WRWORD(D,x3);
	post_inc(DST);
  
	reset_LGT_AGT_EQ;
	ST|=WStatusLookup[x3]&mask_LGT_AGT_EQ;

	AddCycleCount(14);
}

void CPU9900::op_szcb()
{ 
	// Set Zeros Corresponding, Byte: SZCB src, dst

	Byte x1,x2,x3;

	FormatI;
	x1=RCPUBYTE(S);
	post_inc(SRC);

	fixD();
	x2=RCPUBYTE(D);
  	x3=(~x1)&x2;
	WCPUBYTE(D,x3);
	post_inc(DST);

	reset_LGT_AGT_EQ_OP;
	ST|=BStatusLookup[x3]&mask_LGT_AGT_EQ_OP;

	AddCycleCount(14);
}

void CPU9900::op_sra()
{ 
	// Shift Right Arithmetic: SRA src, dst
	// For the shift instructions, a count of '0' means use the
	// value in register 0. If THAT is zero, the count is 16.
	// The arithmetic operations preserve the sign bit

	Word x1,x3,x4; 
	int x2;

#ifdef DO_REMAAP
	// TODO: we have to do a little local work that should be wrapped elsewhere... 
	RegisterTrack[in&0x000f]=PC;
#endif

	FormatV;
	if (D==0)
	{ 
		D=ROMWORD(WP) & 0xf;
		if (D==0) D=16;
		AddCycleCount(8);
	}
	x1=ROMWORD(S);
	x4=x1&0x8000;
	x3=0;
  
	for (x2=0; x2<D; x2++)
	{ 
		x3=x1&1;   /* save carry */
		x1=x1>>1;  /* shift once */
		x1=x1|x4;  /* extend sign bit */
	}
	WRWORD(S,x1);
  
	reset_EQ_LGT_AGT_C;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x3) set_C;

	AddCycleCount(12+2*D);
}

void CPU9900::op_srl()
{ 
	// Shift Right Logical: SRL src, dst
	// The logical shifts do not preserve the sign

	Word x1,x3; 
	int x2;

#ifdef DO_REMAAP
	// TODO: we have to do a little local work that should be wrapped elsewhere... 
	RegisterTrack[in&0x000f]=PC;
#endif

	FormatV;
	if (D==0)
	{ 
		D=ROMWORD(WP)&0xf;
		if (D==0) D=16;
		AddCycleCount(8);
	}
	x1=ROMWORD(S);
	x3=0;
  
	for (x2=0; x2<D; x2++)
	{ 
		x3=x1&1;
		x1=x1>>1;
	}
	WRWORD(S,x1);

	reset_EQ_LGT_AGT_C;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x3) set_C;

	AddCycleCount(12+2*D);
}

void CPU9900::op_sla()
{ 
	// Shift Left Arithmetic: SLA src, dst

	Word x1,x3,x4; 
	int x2;

#ifdef DO_REMAAP
	// TODO: we have to do a little local work that should be wrapped elsewhere... 
	RegisterTrack[in&0x000f]=PC;
#endif

	FormatV;
	if (D==0)
	{ 
		D=ROMWORD(WP)&0xf;
		if (D==0) D=16;
		AddCycleCount(8);
	}
	x1=ROMWORD(S);
	x4=x1&0x8000;
	reset_EQ_LGT_AGT_C_OV;

	x3=0;
	for (x2=0; x2<D; x2++)
	{ 
		x3=x1&0x8000;
		x1=x1<<1;
		if ((x1&0x8000)!=x4) set_OV;
	}
	WRWORD(S,x1);
  
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x3) set_C;

	AddCycleCount(12+2*D);
}

void CPU9900::op_src()
{ 
	// Shift Right Circular: SRC src, dst
	// Circular shifts pop bits off one end and onto the other
	// The carry bit is not a part of these shifts, but it set
	// as appropriate

	Word x1,x4;
	int x2;

#ifdef DO_REMAAP
	// TODO: we have to do a little local work that should be wrapped elsewhere... 
	RegisterTrack[in&0x000f]=PC;
#endif

	FormatV;
	if (D==0)
	{ 
		D=ROMWORD(WP)&0xf;
		if (D==0) D=16;
		AddCycleCount(8);
	}
	x1=ROMWORD(S);
	for (x2=0; x2<D; x2++)
	{ 
		x4=x1&0x1;
		x1=x1>>1;
		if (x4) 
		{
			x1=x1|0x8000;
		}
	}
	WRWORD(S,x1);
  
	reset_EQ_LGT_AGT_C;
	ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ;

	if (x4) set_C;

	AddCycleCount(12+2*D);
}

void CPU9900::op_bad()
{ 
	char buf[128];

	FormatVII;
	sprintf(buf, "Illegal opcode (%04X)", in);
	warn(buf);					// Don't know this Opcode
	AddCycleCount(6);
	SwitchToThread();			// these have a habit of taking over the emulator in crash situations :)
	if (BreakOnIllegal) TriggerBreakPoint();
}

////////////////////////////////////////////////////////////////////////
// functions that are different on the F18A
// (there will be more than just this!)
void CPU9900::op_idleF18() {
	// GPU goes to sleep
	// In this broken implementation, we switch context back to the host CPU
	// TODO: do this properly.
	FormatVII;
	debug_write("GPU Encountered IDLE, switching back to CPU");
	StartIdle();
	if (!bInterleaveGPU) {
		pCurrentCPU = pCPU;
	}
	//AddCycleCount(??);
}

void CPU9900::op_callF18() {
	Word x2;

	FormatVI;
	x2=ROMWORD(WP+30);		// get R15

	if (0 == GetReturnAddress()) {
		SetReturnAddress(PC);
	}
	WRWORD(x2,PC);
	PC=S;

	x2-=2;
	WRWORD(WP+30, x2);		// update R15

	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_retF18(){
	Word x1;
	
	FormatVII;

	// TODO: what do we have to do? Stack based return?
	x1=ROMWORD(WP+30);		// get R15
	x1+=2;
	PC=ROMWORD(x1);			// get PC
	WRWORD(WP+30, x1);		// update R15

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_pushF18(){
	Word x1,x2;

	FormatVI;
	x1=ROMWORD(S);
	x2=ROMWORD(WP+30);		// get R15

	// Push the word on the stack
	// the stack pointer post-decrements (per Matthew)
	WRWORD(x2, x1);
	x2-=2;
	WRWORD(WP+30, x2);		// update R15

	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_popF18(){
	Word x1,x2;

	FormatVI;				// S is really D in this one...
	x2=ROMWORD(WP+30);		// get R15

	// POP the word from the stack
	// the stack pointer post-decrements (per Matthew)
	// so here we pre-increment!
	x2+=2;
	x1=ROMWORD(x2);
	WRWORD(S, x1);
	WRWORD(WP+30, x2);		// update R15

	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_slcF18(){
	// TODO: this one seems misdefined? It only has a source address, and no count??
	// Wasn't it removed from the final??

	Word x1,x2;

	FormatVI;
	x1=ROMWORD(S);

	// circular left shift (TODO: once? does it rotate through carry??)
	x2=x1&0x8000;
	x1<<=1;
	if (x2) x1|=0x0001;
	WRWORD(S, x1);

	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_pixF18(){
	// PIX is a funny instruction. It has a huge candy-machine interface that works with
	// Bitmap mode, with the new bitmap overlay, and it can perform logic operations. It's
	// almost a mini-blitter.

	Word x1,x2,ad;

	FormatIX;
	D=WP+(D<<1);

	// SRC = XXXXXXXX YYYYYYYY
	// Command bits in destination:
	// Format: MAxxRWCE xxOOxxPP
	// M  - 1 = calculate the effective address for GM2 instead of the new bitmap layer (todo: so do nothing else??)         
	//      0 = use the remainder of the bits for the new bitmap layer pixels
	// A  - 1 = retrieve the pixel's effective address instead of setting a pixel   (todo: so do nothing else??)       
	//      0 = read or set a pixel according to the other bits
	// R  - 1 = read current pixel into PP, only after possibly writing PP         
	//      0 = do not read current pixel into PP
	// W  - 1 = do not write PP         
	//      0 = write PP to current pixel
	// C  - 1 = compare OO with PP according to E, and write PP only if true         
	//      0 = always write
	// E  - 1 = only write PP if current pixel is equal to OO         
	//      0 = only write PP if current pixel is not equal to OO
	// OO   pixel to compare to existing pixel
	// PP   new pixel to write, and previous pixel when reading
	//
	// The destination parameter is the PIX instruction as indicated above.  
	// If you use the M or A operations, the destination register will contain the address 
	// after the instruction has executed.  If you use the R operation, the read pixel will 
	// be in PP (over writes the LSbits).  You can read and write at the same time, in which 
	// case the PP bits are written first and then replaced with the original pixel bits
	//
	
	x1=ROMWORD(S);
	x2=ROMWORD(D);

	if (x2 & 0x8000) {
		// calculate BM2 address:
		// 00PYYYYY00000YYY +
		//     0000XXXXX000
		// ------------------
		// 00PY YYYY XXXX XYYY
		//
		// Note: Bitmap GM2 address /includes/ the offset from VR4 (pattern table), so to use
		// it for both pattern and color tables, put the pattern table at >0000
		ad = ((VDPREG[4]&0x04) ? 0x2000 : 0) |			// P
			 ((x1&0x00F8) << 5) |						// YYYYY
			 ((x1&0xF800) >> 8) |						// XXXXX
			 ((x1&0x0007));								// YYY
	} else {
		// calculate overlay address -- I don't have the math for this.
		// TODO: Is it chunky or planar? I assume chunky, 2 bits per pixel, linear.
		// TODO: I don't have the reference in front of me to know what registers do what (size, start address, etc)
		// so.. do this later.
		ad = 0;		// todo: put actual math in place
	}

	// only parse the other bits if M and A are zero
	if ((x2 & 0xc000) == 0) {
		// everything in here thus assumes overlay mode and the pixel is at AD.

		unsigned char pix = RCPUBYTE(ad);	// get the byte
		unsigned char orig = pix;			// save it
		// TODO: if we are 2 bits per pixel, there is still masking to do??
		pix &= 0x03;		// TODO: this is wrong, get the correct pixel into the LSb's
		bool bComp = (pix == ((x2&0x0030)>>4));		// compare the pixels
		unsigned char newpix = x2&0x0003;			// new pixel
		bool bWrite = (x2&0x0400)!=0;				// whether to write

		// TODO: are C and E dependent on W being set? I am assuming yes.
		if ((bWrite)&&(x2&0x0200)) {				// C - compare active (only important if we are writing anyway?)
			if (x2&0x0100) {
				// E is set, comparison must be true
				if (!bComp) bWrite=false;
			} else {
				// E is clear, comparison must be false
				if (bComp) bWrite=false;
			}
		}

		if (bWrite) {
			// TODO: properly merge the pixel (newpix) back in to orig
			WCPUBYTE(ad, (orig&0xfc) | newpix);
		}
		if (x2 & 0x0800) {
			// read is set, so save the original read pixel color in PP
			x2=(x2&0xFFFC) | pix;
			WRWORD(D, x2);			// write it back
		}
	} else {
		// user only wants the address
		WRWORD(D, ad);
	}

	// only the source address can be post-inc
	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_csonF18(){
	// chip select to the EEPROM on (TODO)
	FormatVII;
	
	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_csoffF18(){
	// chip select to the EEPROM off (TODO)
	FormatVII;
	
	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_spioutF18(){

	// todo: based on LDCR
	// Always a byte operation. Always just a single byte.
	// So should we assume it's always encoded with Td=8? Td=0? Ignore Td?
	// My increment code looks at Td>8 to determine whether autoincrement
	// should step by 1 or 2 -- does the F18 do that?
	Byte x1;

	FormatIV;		// TODO: if it never does Td, maybe a FormatVI is more appropriate?
	x1=RCPUBYTE(S);
	post_inc(SRC);

	// TODO: get X1 out to the EEPROM system

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_spiinF18(){
	// based on STCR
	// Always a byte operation. Always just a single byte.
	// So should we assume it's always encoded with Td=8? Td=0? Ignore Td?
	// My increment code looks at Td>8 to determine whether autoincrement
	// should step by 1 or 2 -- does the F18 do that?

	Byte x1;

	FormatIV;
	
	// TODO - get that value from the EEPROM into x1
	x1=0;

	WCPUBYTE(S,(Byte)(x1&0xff));  
	post_inc(SRC);

	// TODO: does it affect any status flags??
	//reset_EQ_LGT_AGT_C_OV;
	//ST|=WStatusLookup[x1]&mask_LGT_AGT_EQ_OV;

	//AddCycleCount(??);
}

void CPU9900::op_rtwpF18(){
	// Almost the same. Used by interrupt code only. Does not touch R13 as there is no WP.
	// ReTurn with Workspace Pointer: RTWP

	FormatVII;
	ST=ROMWORD(WP+30);
	PC=ROMWORD(WP+28);

	//AddCycleCount(??);		// TODO: 
}

////////////////////////////////////////////////////////////////////////
// Fill the CPU Opcode Address table
////////////////////////////////////////////////////////////////////////
void CPU9900::buildcpu()
{
	Word in,x,z;
	unsigned int i;

	for (i=0; i<65536; i++)
	{ 
		in=(Word)i;

		x=(in&0xf000)>>12;
		switch(x)
		{ 
		case 0: opcode0(in);		break;
		case 1: opcode1(in);		break;
		case 2: opcode2(in);		break;
		case 3: opcode3(in);		break;
		case 4: opcode[in]=&CPU9900::op_szc;	break;
		case 5: opcode[in]=&CPU9900::op_szcb; break;
		case 6: opcode[in]=&CPU9900::op_s;	break;
		case 7: opcode[in]=&CPU9900::op_sb;	break;
		case 8: opcode[in]=&CPU9900::op_c;	break;
		case 9: opcode[in]=&CPU9900::op_cb;	break;
		case 10:opcode[in]=&CPU9900::op_a;	break;
		case 11:opcode[in]=&CPU9900::op_ab;	break;
		case 12:opcode[in]=&CPU9900::op_mov;	break;
		case 13:opcode[in]=&CPU9900::op_movb; break;
		case 14:opcode[in]=&CPU9900::op_soc;	break;
		case 15:opcode[in]=&CPU9900::op_socb; break;
		default: opcode[in]=&CPU9900::op_bad;
		}
	} 

	// build the Word status lookup table
	for (i=0; i<65536; i++) {
		WStatusLookup[i]=0;
		// LGT
		if (i>0) WStatusLookup[i]|=BIT_LGT;
		// AGT
		if ((i>0)&&(i<0x8000)) WStatusLookup[i]|=BIT_AGT;
		// EQ
		if (i==0) WStatusLookup[i]|=BIT_EQ;
		// C
		if (i==0) WStatusLookup[i]|=BIT_C;
		// OV
		if (i==0x8000) WStatusLookup[i]|=BIT_OV;
	}
	// And byte
	for (i=0; i<256; i++) {
		Byte x=(Byte)(i&0xff);
		BStatusLookup[i]=0;
		// LGT
		if (i>0) BStatusLookup[i]|=BIT_LGT;
		// AGT
		if ((i>0)&&(i<0x80)) BStatusLookup[i]|=BIT_AGT;
		// EQ
		if (i==0) BStatusLookup[i]|=BIT_EQ;
		// C
		if (i==0) BStatusLookup[i]|=BIT_C;
		// OV
		if (i==0x80) BStatusLookup[i]|=BIT_OV;
		// OP
		for (z=0; x; x&=(x-1)) z++;						// black magic?
		if (z&1) BStatusLookup[i]|=BIT_OP;				// set bit if an odd number
	}
}

///////////////////////////////////////////////////////////////////////////
// CPU Opcode 0 helper function
///////////////////////////////////////////////////////////////////////////
void CPU9900::opcode0(Word in)
{
	Word x;

	x=(in&0x0f00)>>8;

	switch(x)
	{ 
	case 2: opcode02(in);		break;
	case 3: opcode03(in);		break;
	case 4: opcode04(in);		break;
	case 5: opcode05(in);		break;
	case 6: opcode06(in);		break;
	case 7: opcode07(in);		break;
	case 8: opcode[in]=&CPU9900::op_sra;	break;
	case 9: opcode[in]=&CPU9900::op_srl;	break;
	case 10:opcode[in]=&CPU9900::op_sla;	break;
	case 11:opcode[in]=&CPU9900::op_src;	break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////////
// CPU Opcode 02 helper function
////////////////////////////////////////////////////////////////////////////
void CPU9900::opcode02(Word in)
{ 
	Word x;

	x=(in&0x00e0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_li;	break;
	case 2: opcode[in]=&CPU9900::op_ai;	break;
	case 4: opcode[in]=&CPU9900::op_andi; break;
	case 6: opcode[in]=&CPU9900::op_ori;	break;
	case 8: opcode[in]=&CPU9900::op_ci;	break;
	case 10:opcode[in]=&CPU9900::op_stwp; break;
	case 12:opcode[in]=&CPU9900::op_stst; break;
	case 14:opcode[in]=&CPU9900::op_lwpi; break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////////
// CPU Opcode 03 helper function
////////////////////////////////////////////////////////////////////////////
void CPU9900::opcode03(Word in)
{ 
	Word x;

	x=(in&0x00e0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_limi; break;
	case 4: opcode[in]=&CPU9900::op_idle; break;
	case 6: opcode[in]=&CPU9900::op_rset; break;
	case 8: opcode[in]=&CPU9900::op_rtwp; break;
	case 10:opcode[in]=&CPU9900::op_ckon; break;
	case 12:opcode[in]=&CPU9900::op_ckof; break;
	case 14:opcode[in]=&CPU9900::op_lrex; break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

///////////////////////////////////////////////////////////////////////////
// CPU Opcode 04 helper function
///////////////////////////////////////////////////////////////////////////
void CPU9900::opcode04(Word in)
{ 
	Word x;

	x=(in&0x00c0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_blwp; break;
	case 4: opcode[in]=&CPU9900::op_b;	break;
	case 8: opcode[in]=&CPU9900::op_x;	break;
	case 12:opcode[in]=&CPU9900::op_clr;	break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

//////////////////////////////////////////////////////////////////////////
// CPU Opcode 05 helper function
//////////////////////////////////////////////////////////////////////////
void CPU9900::opcode05(Word in)
{ 
	Word x;

	x=(in&0x00c0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_neg;	break;
	case 4: opcode[in]=&CPU9900::op_inv;	break;
	case 8: opcode[in]=&CPU9900::op_inc;	break;
	case 12:opcode[in]=&CPU9900::op_inct; break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////
// CPU Opcode 06 helper function
////////////////////////////////////////////////////////////////////////
void CPU9900::opcode06(Word in)
{ 
	Word x;

	x=(in&0x00c0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_dec;	break;
	case 4: opcode[in]=&CPU9900::op_dect; break;
	case 8: opcode[in]=&CPU9900::op_bl;	break;
	case 12:opcode[in]=&CPU9900::op_swpb; break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////
// CPU Opcode 07 helper function
////////////////////////////////////////////////////////////////////////
void CPU9900::opcode07(Word in)
{ 
	Word x;

	x=(in&0x00c0)>>4;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_seto; break;
	case 4: opcode[in]=&CPU9900::op_abs;	break;
	default: opcode[in]=&CPU9900::op_bad;
	}	
}

////////////////////////////////////////////////////////////////////////
// CPU Opcode 1 helper function
////////////////////////////////////////////////////////////////////////
void CPU9900::opcode1(Word in)
{ 
	Word x;

	x=(in&0x0f00)>>8;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_jmp;	break;
	case 1: opcode[in]=&CPU9900::op_jlt;	break;
	case 2: opcode[in]=&CPU9900::op_jle;	break;
	case 3: opcode[in]=&CPU9900::op_jeq;	break;
	case 4: opcode[in]=&CPU9900::op_jhe;	break;
	case 5: opcode[in]=&CPU9900::op_jgt;	break;
	case 6: opcode[in]=&CPU9900::op_jne;	break;
	case 7: opcode[in]=&CPU9900::op_jnc;	break;
	case 8: opcode[in]=&CPU9900::op_joc;	break;
	case 9: opcode[in]=&CPU9900::op_jno;	break;
	case 10:opcode[in]=&CPU9900::op_jl;	break;
	case 11:opcode[in]=&CPU9900::op_jh;	break;
	case 12:opcode[in]=&CPU9900::op_jop;	break;
	case 13:opcode[in]=&CPU9900::op_sbo;	break;
	case 14:opcode[in]=&CPU9900::op_sbz;	break;
	case 15:opcode[in]=&CPU9900::op_tb;	break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////
// CPU Opcode 2 helper function
////////////////////////////////////////////////////////////////////////
void CPU9900::opcode2(Word in)
{ 
	Word x;

	x=(in&0x0c00)>>8;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_coc; break;
	case 4: opcode[in]=&CPU9900::op_czc; break;
	case 8: opcode[in]=&CPU9900::op_xor; break;
	case 12:opcode[in]=&CPU9900::op_xop; break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

////////////////////////////////////////////////////////////////////////
// CPU Opcode 3 helper function
////////////////////////////////////////////////////////////////////////
void CPU9900::opcode3(Word in)
{ 
	Word x;

	x=(in&0x0c00)>>8;

	switch(x)
	{ 
	case 0: opcode[in]=&CPU9900::op_ldcr; break;
	case 4: opcode[in]=&CPU9900::op_stcr; break;
	case 8: opcode[in]=&CPU9900::op_mpy;	break;
	case 12:opcode[in]=&CPU9900::op_div;	break;
	default: opcode[in]=&CPU9900::op_bad;
	}
}

///// F18A implementation/override class

// TODO: the only speed rating we have is 150-200nS per instruction on average. Actual
// timing information is not currently available. This is probably good
// enough for a rough start.
// Some details that are available:
// 100MHz clock
// jump takes 5 clocks
// instructions with 2 symbolic addresses take 20 clocks
//
// Stack operations use R15 as the stack pointer - always a word operation on EVEN address (so top is >47FE)
//
// the GPU auto-starts on reset (on VDP reset!) after loading the bitstream from EPROM, which pre-sets all RAM.
// TODO: I need a dump of the bitstream from Matthew to include, and I need a load routine and to start the GPU.
// 
// TODO: Disassembler needs to know about the changed opcodes

GPUF18A::GPUF18A() {
	// build default 9900
	buildcpu();

	// override with F18A replacements

	// new opcodes
	// CALL 0C80 - 0000 1100 10Ts SSSS
	for (int idx=0x0C80; idx<=0x0CBF; idx++) {
		opcode[idx] = &CPU9900::op_callF18;
	}

	// RET  0C00 - 0000 1100 0000 0000
	opcode[0x0c00] = &CPU9900::op_retF18;

	// PUSH 0D00 - 0000 1101 00Ts SSSS
	for (int idx=0x0D00; idx<=0x0D3F; idx++) {
		opcode[idx]=&CPU9900::op_pushF18;
	}

	// POP  0F00 - 0000 1111 00Td DDDD
	for (int idx=0x0F00; idx<=0x0f3F; idx++) {
		opcode[idx]=&CPU9900::op_popF18;
	}

	// SLC  0E00 - 0000 1110 00Ts SSSS
	for (int idx=0x0E00; idx<=0x0E3F; idx++) {
		opcode[idx]=&CPU9900::op_slcF18;
	}

	// Modified opcodes
	
	// IDLE = IDLE     Forces the GPU state machine to the idle state, restart with a trigger from host
	opcode[0x0340] = &CPU9900::op_idleF18;

	//TODO: be smart about these later
	for (int idx=0; idx<0xffff; idx++) {
		// XOP  = PIX       The new dedicated pixel plotting instruction
		if (opcode[idx] == &CPU9900::op_xop) opcode[idx]=&CPU9900::op_pixF18;

		// CKON = SPI !CE Sets the chip enable line to the SPI Flash ROM low (enables the ROM)
		if (opcode[idx] == &CPU9900::op_ckon) opcode[idx]=&CPU9900::op_csonF18;

		// CKOF = SPI CE  Sets the chip enable line to the SPI Flash ROM high (disables the ROM)
		if (opcode[idx] == &CPU9900::op_ckof) opcode[idx]=&CPU9900::op_csoffF18;

		// LDCR = SPI OUT Writes a byte (always a byte operation) to the SPI Flash ROM
		if (opcode[idx] == &CPU9900::op_ldcr) opcode[idx]=&CPU9900::op_spioutF18;

		// STCR = SPI IN  Reads a byte (always a byte operation) from the SPI Flash ROM
		if (opcode[idx] == &CPU9900::op_stcr) opcode[idx]=&CPU9900::op_spiinF18;

		// RTWP = RTWP     Modified, does not use R13, only performs R14->PC, R15->status flags
		if (opcode[idx] == &CPU9900::op_rtwp) opcode[idx]=&CPU9900::op_rtwpF18;

		// Unimplemented
		if (opcode[idx] == &CPU9900::op_sbo) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_sbz) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_tb) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_blwp) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_stwp) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_lwpi) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_limi) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_rset) opcode[idx]=&CPU9900::op_bad;
		if (opcode[idx] == &CPU9900::op_lrex) opcode[idx]=&CPU9900::op_bad;
	}

	pType="F18A";
}

void GPUF18A::reset() {
	StartIdle();
	nReturnAddress=0;

	// zero out the post increment tracking
	nPostInc[SRC]=0;
	nPostInc[DST]=0;

	// todo: big time hack - set the scanline register to 192 for programs that use this
	// method of VSYNC (note that programs might overwrite it...)
	VDP[0x7000] = 192;

	WP=0xff80;				// just a dummy, out of the way place for them. F18A doesn't have a WP
	PC=0;					// it doesn't run automatically, either
	X_flag=0;				// not currently executing an X instruction (todo: does it have X?)
	ST=(ST&0xfff0);			// disable interrupts (todo: does it have interrupts?)
	SetCycleCount(26);		// not that it's a big deal, but that's how long reset takes ;)

	// TODO: GPU /does/ autostart, but we have to load the bitstream from eeprom first (so we need a stopidle - not the startidle above)
}

// There are no side-effects to reading anything from the F18A,
// so we won't bother implementing the word/rbw actions
// There are WRITE side effects, but till we have the registers
// implemented it doesn't matter.
// TODO: does the F18 /actually/ implement word-only access or can it do TRUE bytes? word accesses are aligned, bytes are true bytes
// TODO: do reads differ from writes in that respect? no
// TODO: do F18 writes perform a read-before-write in any case? (does it matter? there are no side-effects. Just timing.)

// >0000 to >3FFF VRAM
// >4000 to >47FF GPU-RAM
// >5000 to >5x7F Color Registers, and you can READ them! (2 bytes per register)
// >6000 to >6x3F VDP Registers, read/write 
// >7000 to >7xxx Current scan line (0 to 192 / 240 (in 30-row mode))
// >8000 to >8xx3 32-bit counter
// >9000 to >9xx3 32-bit RNG
// >A000 to >A??? SPI interface - not worked out yet -- TODO: this is worked out, get detailed
// >B000 to >Bxxx F18A version
Byte GPUF18A::RCPUBYTE(Word src) {
	UpdateHeatVDP(src);		// todo: maybe GPU vdp writes can be a different color
	return VDP[src];
}

void GPUF18A::WCPUBYTE(Word dest, Byte c) {
	UpdateHeatVDP(dest);		// todo: maybe GPU vdp writes can be a different color
	VDP[dest]=c;
	VDPMemInited[dest]=1;
	if (dest < 0x4000) redraw_needed=1;		// to avoid redrawing because of GPU R0-R15 registers changing

	if (((dest&0xF0FF)>=0x6000) && ((dest&0xf0ff)<=0x603f)) {
		// write VDP register
		wVDPreg(dest&0x3f,c);
	}
}

Word GPUF18A::ROMWORD(Word src) {
	src&=0xfffe;

	UpdateHeatVDP(src);		// todo: maybe GPU vdp writes can be a different color
	UpdateHeatVDP(src+1);		// todo: maybe GPU vdp writes can be a different color

	return (VDP[src]<<8) | VDP[src+1];
}

void GPUF18A::WRWORD(Word dest, Word val) {
	UpdateHeatVDP(dest);		// todo: maybe GPU vdp writes can be a different color
	UpdateHeatVDP(dest+1);		// todo: maybe GPU vdp writes can be a different color

	dest&=0xfffe;
	VDP[dest]=val>>8;
	VDP[dest+1]=val&0xff;

	VDPMemInited[dest]=1;
	VDPMemInited[dest+1]=1;
	if (dest < 0x4000) redraw_needed=1;		// to avoid redrawing because of GPU R0-R15 registers changing
}

Word GPUF18A::GetSafeWord(int x, int) {
	// bank is irrelevant
	return ROMWORD(x);
}

// Read a byte withOUT triggering the hardware - for monitoring
Byte GPUF18A::GetSafeByte(int x, int) {
	// bank is irrelevant
	return RCPUBYTE(x);
}

void GPUF18A::TriggerInterrupt(Word /*vector*/) {
	// do nothing, there are no external interrupts
}
