//
// (C) 2005-2014 Mike Brent aka Tursi aka HarmlessLion.com
// This software is provided AS-IS. No warranty
// express or implied is provided.
//
// This notice defines the entire license for this code.
// All rights not explicity granted here are reserved by the
// author.
//
// You may redistribute this software provided the original
// archive is UNCHANGED and a link back to my web page,
// http://harmlesslion.com, is provided as the author's site.
// It is acceptable to link directly to a subpage at harmlesslion.com
// provided that page offers a URL for that purpose
//
// Source code, if available, is provided for educational purposes
// only. You are welcome to read it, learn from it, mock
// it, and hack it up - for your own use only.
//
// Please contact me before distributing derived works or
// ports so that we may work out terms. I don't mind people
// using my code but it's been outright stolen before. In all
// cases the code must maintain credit to the original author(s).
//
// -COMMERCIAL USE- Contact me first. I didn't make
// any money off it - why should you? ;) If you just learned
// something from this, then go ahead. If you just pinched
// a routine or two, let me know, I'll probably just ask
// for credit. If you want to derive a commercial tool
// or use large portions, we need to talk. ;)
//
// If this, itself, is a derived work from someone else's code,
// then their original copyrights and licenses are left intact
// and in full force.
//
// http://harmlesslion.com - visit the web page for contact info
//
///////////////////////////////////////////////////////
// Classic99 VDP Routines
// M.Brent
///////////////////////////////////////////////////////

#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0500

#include <stdio.h>
#include <windows.h>
#include <ddraw.h>
#include <commctrl.h>
#include <commdlg.h>
#include <atlstr.h>

#include "tiemul.h"
#include "..\resource.h"
#include "..\2xSaI\2xSaI.h"
#include "..\FilterDLL\sms_ntsc.h"

// 16-bit 0rrrrrgggggbbbbb values
//int TIPALETTE[16]={ 
//	0x0000, 0x0000, 0x1328, 0x2f6f, 0x295d, 0x3ddf, 0x6949, 0x23be,
//	0x7d4a, 0x7def, 0x6b0a, 0x7330, 0x12c7, 0x6577, 0x6739, 0x7fff,
//};
// 32-bit 0RGB colors
unsigned int TIPALETTE[16] = {
	0x00000000,0x00000000,0x0020C840,0x0058D878,0x005050E8,0x007870F8,0x00D05048,
	0x0040E8F0,0x00F85050,0x00F87878,0x00D0C050,0x00E0C880,0x0020B038,0x00C858B8,
	0x00C8C8C8,0x00F8F8F8
};

int SIT;									// Screen Image Table
int CT;										// Color Table
int PDT;									// Pattern Descriptor Table
int SAL;									// Sprite Allocation Table
int SDT;									// Sprite Descriptor Table
int CTsize;									// Color Table size in Bitmap Mode
int PDTsize;								// Pattern Descriptor Table size in Bitmap Mode

Byte VDP[128*1024];							// Video RAM (16k, except for now we are faking the rest of the VDP address space for F18A (todo: only 18k on real chip))
Byte SprColBuf[256][192];					// Sprite Collision Buffer
Byte HeatMap[256*256*3];					// memory access heatmap (red/green/blue = CPU/VDP/GROM)
int SprColFlag;								// Sprite collision flag
int bF18AActive = 0;						// was the F18 activated?
int bF18Enabled = 1;						// is it even enabled?
int bInterleaveGPU = 1;						// whether to run the GPU and the CPU together (impedes debug - temporary option)

IDirectDraw7 *lpdd=NULL;					// DirectDraw object
LPDIRECTDRAWSURFACE7 lpdds=NULL;			// Primary surface
LPDIRECTDRAWSURFACE7 ddsBack=NULL;			// Back buffer
LPDIRECTDRAWCLIPPER  lpDDClipper=NULL;		// Window clipper
DDSURFACEDESC2 CurrentDDSD;					// current back buffer settings

// deprecated directdraw functions we need to manually extract from ddraw.dll
typedef HRESULT (WINAPI* LPDIRECTDRAWCREATEEX )( GUID FAR * lpGuid, LPVOID  *lplpDD, REFIID  iid,IUnknown FAR *pUnkOuter );

int FullScreenMode=0;						// Current full screen mode
int FilterMode=0;							// Current filter mode
int nDefaultScreenScale=1;					// default screen scale multiplier
int nXSize=256, nYSize=192;					// custom sizing
int TVFiltersAvailable=0;					// Depends on whether we can load the Filter DLL
int TVScanLines=1;							// Whether to draw scanlines or not
int VDPDebug=0;								// When set, displays all 256 chars
int bEnable80Columns=1;						// Enable the beginnings of the 80 column mode - to replace someday with F18A
int bEnable128k=0;							// disabled by default - it's a non-real-world combination of F18 and 9938, so HACK.
#define TV_WIDTH (602+32)					// how wide is TV mode really?

sms_ntsc_t tvFilter;						// TV Filter structure
sms_ntsc_setup_t tvSetup;					// TV Setup structure
HMODULE hFilterDLL;							// Handle to Filter DLL
void (*sms_ntsc_init)(sms_ntsc_t *ntsc, sms_ntsc_setup_t const *setup);	// pointer to init function
void (*sms_ntsc_blit)(sms_ntsc_t const *ntsc, unsigned int const *sms_in, long in_row_width, int in_width, int height, void *rgb_out, long out_pitch);
																		// pointer to blit function
void (*sms_ntsc_scanlines)(void *pFrame, int nWidth, int nStride, int nHeight);

HMODULE hHQ4DLL;							// Handle to HQ4x DLL
void (*hq4x_init)(void);
void (*hq4x_process)(unsigned char *pBufIn, unsigned char *pBufOut);

HANDLE Video_hdl[2];						// Handles for Display/Blit events
unsigned int *framedata;					// The actual pixel data
unsigned int *framedata2;					// Filtered pixel data
BITMAPINFO myInfo;							// Bitmapinfo header for the DIB functions
BITMAPINFO myInfo2;							// Bitmapinfo header for the DIB functions
BITMAPINFO myInfo32;						// Bitmapinfo header for the DIB functions
BITMAPINFO myInfoTV;						// Bitmapinfo header for the DIB functions
BITMAPINFO myInfo80Col;						// Bitmapinfo header for the DIB functions
HDC tmpDC;									// Temporary DC for StretchBlt to work from

int redraw_needed;							// redraw flag
int end_of_frame;							// end of frame flag (move this to tiemul.cpp, not used in VDP)
int skip_interrupt;							// flag for some instructions
Byte VDPREG[59];							// VDP read-only registers (9918A has 8, we define 9 to support 80 cols, and the F18 has 59 (!) (and 16 status registers!))
Byte VDPS;									// VDP Status register

// Added by RasmusM
int F18AStatusRegisterNo = 0;				// F18A Status register number
int F18AECModeSprite = 0;					// F18A Enhanced color mode for sprites (0 = normal, 1 = 1 bit color mode, 2 = 2 bit color mode, 3 = 3 bit color mode)
int F18ASpritePaletteSize = 16;				// F18A Number of entries in each palette: 2, 4, 8 (depends on ECM)
int bF18ADataPortMode = 0;					// F18A Data-port mode
int bF18AAutoIncPaletteReg = 0;				// F18A Auto increment palette register
int F18APaletteRegisterNo = 0;				// F18A Palette register number
int F18APaletteRegisterData = -1;			// F18A Temporary storage of data written to palette register
int F18APalette[64] = {
	0x0000, 0x0000, 0x1328, 0x2f6f, 0x295d, 0x3ddf, 0x6949, 0x23be,
	0x7d4a, 0x7def, 0x6b0a, 0x7330, 0x12c7, 0x6577, 0x6739, 0x7fff,
	0x0000, 0x0000, 0x1328, 0x2f6f, 0x295d, 0x3ddf, 0x6949, 0x23be,
	0x7d4a, 0x7def, 0x6b0a, 0x7330, 0x12c7, 0x6577, 0x6739, 0x7fff,
	0x0000, 0x0000, 0x1328, 0x2f6f, 0x295d, 0x3ddf, 0x6949, 0x23be,
	0x7d4a, 0x7def, 0x6b0a, 0x7330, 0x12c7, 0x6577, 0x6739, 0x7fff,
	0x0000, 0x0000, 0x1328, 0x2f6f, 0x295d, 0x3ddf, 0x6949, 0x23be,
	0x7d4a, 0x7def, 0x6b0a, 0x7330, 0x12c7, 0x6577, 0x6739, 0x7fff
};
// RasmusM added end

Word VDPADD;								// VDP Address counter
int vdpaccess;								// VDP address write flipflop (low/high)
int vdpwroteaddress;						// VDP (instruction) countdown after writing an address (weak test)
Byte vdpprefetch,vdpprefetchuninited;		// VDP Prefetch
unsigned long hVideoThread;					// thread handle
int hzRate;									// flag for 50 or 60hz
int Recording;								// Flag for AVI recording
int MaintainAspect;							// Flag for Aspect ratio
int StretchMode;							// Setting for video stretching
int bUse5SpriteLimit;						// whether the sprite flicker is on
bool bDisableBlank, bDisableSprite, bDisableBackground;	// other layers :)

extern int fontX, fontY;					// Font dimensions
extern HANDLE hDebugWindowUpdateEvent;		// debug draw event
extern int nSystem;							// which system we are using (detect 99/4)
extern int drawspeed;						// frameskip... sorta. Not sure this is still valuable
extern int nVideoLeft, nVideoTop;

//////////////////////////////////////////////////////////
// Helpers for the TV controls
//////////////////////////////////////////////////////////
void GetTVValues(double *hue, double *sat, double *cont, double *bright, double *sharp) {
	*hue=tvSetup.hue;
	*sat=tvSetup.saturation;
	*cont=tvSetup.contrast;
	*bright=tvSetup.brightness;
	*sharp=tvSetup.sharpness;
}

void SetTVValues(double hue, double sat, double cont, double bright, double sharp) {
	tvSetup.hue=hue;
	tvSetup.saturation=sat;
	tvSetup.contrast=cont;
	tvSetup.brightness=bright;
	tvSetup.sharpness=sharp;
	if (sms_ntsc_init) {
		if (!TryEnterCriticalSection(&VideoCS)) {
			return;		// do it later
		}
		sms_ntsc_init(&tvFilter, &tvSetup);
		LeaveCriticalSection(&VideoCS);
	}
}

//////////////////////////////////////////////////////////
// Get table addresses from Registers
// We pass in VDP reg 0 so the bitmap filtering can be external
//////////////////////////////////////////////////////////
void gettables(int reg0)
{
	/* Screen Image Table */
	if ((bEnable80Columns) && (reg0 & 0x04)) {
		// in 80-column text mode, the two LSB are some kind of mask that we here ignore - the rest of the register is larger
		// The 9938 requires that those bits be set to 11, therefore, the F18A treats 11 and 00 both as 00, but treats
		// 01 and 10 as their actual values. (Okay, that is a bit weird.) That said, the F18A still only honours the least
		// significant 4 bits and ignores the rest (the 9938 reads 7 bits instead of 4, masking as above).
		// So anyway, the goal is F18A support, but the 9938 mask would be 0x7C instead of 0x0C, and the shift was only 8?
		// TODO: check the 9938 datasheet - did Matthew get it THAT wrong? Or does the math work out anyway?
		// Anyway, this works for table at >0000, which is most of them.
		SIT=(VDPREG[2]&0x0F);
		if ((SIT&0x03)==0x03) SIT&=0x0C;	// mask off a 0x03 pattern, 0x00,0x01,0x02 left alone
		SIT<<=10;
	} else {
		SIT=((VDPREG[2]&0x0f)<<10);
	}
	/* Sprite Attribute List */
	SAL=((VDPREG[5]&0x7f)<<7);
	/* Sprite Descriptor Table */
	SDT=((VDPREG[6]&0x07)<<11);

	// The normal math for table addresses isn't quite right in bitmap mode
	// The PDT and CT have different math and a size setting
	if (reg0&0x02) {
		// this is for bitmap modes
		CT=(VDPREG[3]&0x80) ? 0x2000 : 0;
		CTsize=((VDPREG[3]&0x7f)<<6)|0x3f;
		PDT=(VDPREG[4]&0x04) ? 0x2000 : 0;
		PDTsize=((VDPREG[4]&0x03)<<11);
		if (VDPREG[1]&0x10) {	// in Bitmap text, we fill bits with 1, as there is no color table
			PDTsize|=0x7ff;
		} else {
			PDTsize|=(CTsize&0x7ff);	// In other bitmap modes we get bits from the color table mask
		}
	} else {
		// this is for non-bitmap modes
		/* Colour Table */
		CT=VDPREG[3]<<6;
		/* Pattern Descriptor Table */
		PDT=((VDPREG[4]&0x07)<<11);
		CTsize=32;
		PDTsize=2048;
	}
}

////////////////////////////////////////////////////////////
// Startup and run VDP graphics interface
////////////////////////////////////////////////////////////
void VDPmain()
{	
	DWORD ret;
	HDC myDC;

	Init_2xSaI(888);

	// load the Filter DLL
	TVFiltersAvailable=0;
	hFilterDLL=LoadLibrary("FilterDll.dll");
	if (NULL == hFilterDLL) {
		debug_write("Failed to load filter library.");
	} else {
		sms_ntsc_init=(void (*)(sms_ntsc_t*,sms_ntsc_setup_t const *))GetProcAddress(hFilterDLL, "sms_ntsc_init");
		sms_ntsc_blit=(void (*)(sms_ntsc_t const *, unsigned int const *, long, int, int, void *, long))GetProcAddress(hFilterDLL, "sms_ntsc_blit");
		sms_ntsc_scanlines=(void (*)(void *, int, int, int))GetProcAddress(hFilterDLL, "sms_ntsc_scanlines");
		if ((NULL == sms_ntsc_blit) || (NULL == sms_ntsc_init) || (NULL == sms_ntsc_scanlines)) {
			debug_write("Failed to find entry points in filter library.");
			FreeLibrary(hFilterDLL);
			sms_ntsc_blit=NULL;
			sms_ntsc_init=NULL;
			sms_ntsc_scanlines=NULL;
			hFilterDLL=NULL;
		} else {
			// Some of these are set up by SetTVValues()
//			tvSetup.hue=0;			// -1.0 to +1.0
//			tvSetup.saturation=0;	// -1.0 to +1.0
//			tvSetup.contrast=0;		// -1.0 to +1.0
//			tvSetup.brightness=0;	// -1.0 to +1.0
//			tvSetup.sharpness=0;	// -1.0 to +1.0
			tvSetup.gamma=0;		// -1.0 to +1.0
			tvSetup.resolution=0;	// -1.0 to +1.0
			tvSetup.artifacts=0;	// -1.0 to +1.0
			tvSetup.fringing=0;		// -1.0 to +1.0
			tvSetup.bleed=0;		// -1.0 to +1.0
			tvSetup.decoder_matrix=NULL;
			tvSetup.palette_out=NULL;
			sms_ntsc_init(&tvFilter, &tvSetup);
			TVFiltersAvailable=1;
		}
	}

	hHQ4DLL=LoadLibrary("HQ4xDll.dll");
	if (NULL == hHQ4DLL) {
		debug_write("Failed to load HQ4 library.");
	} else {
		hq4x_init=(void (*)(void))GetProcAddress(hHQ4DLL, "hq4x_init");
		hq4x_process=(void (*)(unsigned char *pBufIn, unsigned char *pBufOut))GetProcAddress(hHQ4DLL, "hq4x_process");
		if ((NULL == hq4x_init) || (NULL == hq4x_process)) {
			debug_write("Failed to find entry points in HQ4x library.");
			FreeLibrary(hHQ4DLL);
			hq4x_init=NULL;
			hq4x_process=NULL;
			hHQ4DLL=NULL;
		} else {
			hq4x_init();			// TODO: this needs to be updated for RGB
		}
	}

	myInfo.bmiHeader.biSize=sizeof(myInfo.bmiHeader);
	myInfo.bmiHeader.biWidth=256+16;
	myInfo.bmiHeader.biHeight=192+16;
	myInfo.bmiHeader.biPlanes=1;
	myInfo.bmiHeader.biBitCount=32;
	myInfo.bmiHeader.biCompression=BI_RGB;
	myInfo.bmiHeader.biSizeImage=0;
	myInfo.bmiHeader.biXPelsPerMeter=1;
	myInfo.bmiHeader.biYPelsPerMeter=1;
	myInfo.bmiHeader.biClrUsed=0;
	myInfo.bmiHeader.biClrImportant=0;

	memcpy(&myInfo2, &myInfo, sizeof(myInfo));
	myInfo2.bmiHeader.biWidth=512+32;
	myInfo2.bmiHeader.biHeight=384+29;

	memcpy(&myInfoTV, &myInfo2, sizeof(myInfo2));
	myInfoTV.bmiHeader.biWidth=TV_WIDTH;

	memcpy(&myInfo32, &myInfo, sizeof(myInfo));
	myInfo32.bmiHeader.biWidth*=4;
	myInfo32.bmiHeader.biHeight*=4;
	myInfo32.bmiHeader.biBitCount=32;

	memcpy(&myInfo80Col, &myInfo, sizeof(myInfo));
	myInfo80Col.bmiHeader.biWidth=512+16;

	myDC=GetDC(myWnd);
	tmpDC=CreateCompatibleDC(myDC);
	ReleaseDC(myWnd, myDC);

	SetupDirectDraw(false);

	// now we create a waitable object and sit on it - the main thread
	// will tell us when we should redraw the screen.
	DisplayEvent=CreateEvent(NULL, false, false, NULL);
	if (NULL == DisplayEvent)
		debug_write("Video Event Creation failed");
	BlitEvent=CreateEvent(NULL, false, false, NULL);
	if (NULL == BlitEvent)
		debug_write("Blit Event Creation failed");

	// layers all enabled at start
	bDisableBlank=false;
	bDisableSprite=false;
	bDisableBackground=false;

	debug_write("Starting video loop");
	redraw_needed=1;

	while (quitflag==0)
	{
		if ((ret=WaitForMultipleObjects(2, Video_hdl, false, 100)) != WAIT_TIMEOUT)
		{
			if (WAIT_FAILED==ret)
				ret=GetLastError();

			if (WAIT_OBJECT_0 == ret) {
				// do the VDP draw
				VDPdisplay();
				// NOW set the VDP interrupt for end of frame
				VDPS|=VDPS_INT;
			}

			doBlit();
			ResetEvent(Video_hdl[1]);	// in case we fell through from above

			// Don't ever spend all our time doing this!
			Sleep(5);		// rounds up to quantum
		}
	}

	if (NULL != hFilterDLL) {
		sms_ntsc_blit=NULL;
		sms_ntsc_init=NULL;
		sms_ntsc_scanlines=NULL;
		FreeLibrary(hFilterDLL);
		hFilterDLL=NULL;
	}
	takedownDirectDraw();
	DeleteDC(tmpDC);
	CloseHandle(DisplayEvent);
}

// Passed Windows mouse based X and Y, figure out the char under
// the pointer. If it's not a text mode or it's not printable, then
// return -1. Due to screen borders, we have a larger area than
// the TI actually displays.
char VDPGetChar(int x, int y, int width, int height) {
	double nCharWidth=34.0;	// default for graphics mode
	int ch;
	int nCharsPerLine=32;	// default for graphics mode
	int reg0 = VDPREG[0];

	if (nSystem == 0) {
		// disable bitmap for 99/4
		reg0&=~0x02;
	}

	gettables(reg0);

	if (!(VDPREG[1] & 0x40))		// Disable display
	{
		return -1;
	}

	if ((VDPREG[1] & 0x18)==0x18)	// MODE BITS 2 and 1
	{
		return -1;
	}

	if (VDPREG[1] & 0x10)			// MODE BIT 2
	{
		if (reg0 & 0x02) {		// BITMAP MODE BIT
			return -1;
		}
		
		nCharWidth=45.0;			// text mode
		nCharsPerLine=40;				
	}

	if (VDPREG[1] & 0x08)			// MODE BIT 1
	{
		return -1;
	}

	if (reg0 & 0x02) {			// BITMAP MODE BIT
		return -1;
	}

	// If it wasn't text and we got here, then it's multicolor
	// nCharWidth now has the number of columns across. There are
	// always 24+2 rows.
	double nXWidth, nYHeight;

	nYHeight=height/26.0;
	nXWidth=width/nCharWidth;

	if ((nXWidth<1.0) || (nYHeight<1.0)) {
		// screen is too small to differentiate (this is unlikely)
		return -1;
	}

	int row=(int)(y/nYHeight)-1;
	if ((row < 0) || (row > 23)) {
		return -1;
	}

	int col;
	if (nCharWidth > 39.0) {
		col=(int)(x/nXWidth-.5)-2;
		if ((col < 0) || (col > 39)) {
			return -1;
		}
	} else {
		col=(int)(x/nXWidth)-1;
		if ((col < 0) || (col > 31)) {
			return -1;
		}
	}

	ch=VDP[SIT+(row*nCharsPerLine)+col];

	if (isprint(ch)) {
		return ch;
	}

	// this is really hacky but it should work ;)
	// handle the TI BASIC character offset
	if ((ch >= 0x80) && (isprint(ch-0x60))) {
		return ch-0x60;
	}

	return -1;
}

//////////////////////////////////////////////////////////
// Perform drawing of a single frame
// Determines which screen mode to draw
//////////////////////////////////////////////////////////
void VDPdisplay()
{
	int idx;
	DWORD longcol;
	DWORD *plong;
	int reg0 = VDPREG[0];
	int nMax;

	if (nSystem == 0) {
		// disable bitmap for 99/4
		reg0&=~0x02;
	}
	if (!bEnable80Columns) {
		// disable 80 columns if not enabled
		reg0&=~0x04;
	}
	gettables(reg0);

	if (redraw_needed) {
		redraw_needed=0;

		// blank screen
		plong=(DWORD*)framedata;
		longcol=TIPALETTE[VDPREG[7]&0xf];
		if ((reg0&0x04)&&(VDPREG[1]&0x10)) {
			// 80 column text
			nMax = ((512+16)*(192+16))/4;
		} else {
			// all other modes
			nMax = ((256+16)*(192+16))/4;
		}
		for (idx=0; idx<nMax; idx++) {
			*(plong++)=longcol;
			*(plong++)=longcol;
			*(plong++)=longcol;
			*(plong++)=longcol;
		}

		if (!bDisableBlank) {
			if (!(VDPREG[1] & 0x40)) {	// Disable display
				return;
			}
		}

		if (!bDisableBackground) {
			if ((VDPREG[1] & 0x18)==0x18)	// MODE BITS 2 and 1
			{
				VDPillegal();
				return;
			}

			if (VDPREG[1] & 0x10)		// MODE BIT 2
			{
				if (reg0 & 0x02) {		// BITMAP MODE BIT
					VDPtextII();		// undocumented bitmap text mode
				} else if (reg0 & 0x04) {	// MODE BIT 4 (9938)
					VDPtext80();		// 80-column text, similar to 9938/F18A
				} else {
					VDPtext();			// regular 40-column text
				}
				return;
			}

			if (VDPREG[1] & 0x08)		// MODE BIT 1
			{
				if (reg0 & 0x02) {		// BITMAP MODE BIT
					VDPmulticolorII();	// undocumented bitmap multicolor mode
				} else {
					VDPmulticolor();
				}
				return;
			}

			if (reg0 & 0x02) {			// BITMAP MODE BIT
				VDPgraphicsII();		// documented bitmap graphics mode
			} else {
				VDPgraphics();
			}
		} else {
			// as long as mode bit 2 is not set, sprites are okay
			if ((VDPREG[1] & 0x10) == 0) {
				DrawSprites();
			}
		}
	} else {
		// we have to redraw the sprites even if the screen didn't change, so that collisions are updated
		// as the CPU may have cleared the collision bit
		// as long as mode bit 2 (text) is not set, and the display is enabled, sprites are okay
		if ((VDPREG[1] & 0x10) == 0) {
			if ((bDisableBlank) || (VDPREG[1] & 0x40)) {
				DrawSprites();
			}
		}
	}
}

//////////////////////////////////////////////////////
// Draw a debug screen 
//////////////////////////////////////////////////////
void draw_debug()
{
	if (NULL != dbgWnd) {
		SetEvent(hDebugWindowUpdateEvent);
	}
}

/////////////////////////////////////////////////////////
// Draw graphics mode
/////////////////////////////////////////////////////////
void VDPgraphics()
{
	int t,o;						// temp variables
	int i1,i2,i3;					// temp variables
	int p_add;
	int fgc, bgc, c;
	unsigned char ch=0xff;

	o=0;							// offset in SIT

	for (i1=0; i1<192; i1+=8)		// y loop
	{ 
		for (i2=0; i2<256; i2+=8)	// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}
			p_add=PDT+(ch<<3);
			c = ch>>3;
			fgc=VDP[CT+c];
			bgc=fgc&0x0f;
			fgc>>=4;
			o++;

			for (i3=0; i3<8; i3++)
			{	
				t=VDP[p_add++];
	
				pixel(i2,i1+i3,(t&0x80 ? fgc : bgc ));
				pixel(i2+1,i1+i3,(t&0x40 ? fgc : bgc ));
				pixel(i2+2,i1+i3,(t&0x20 ? fgc : bgc ));
				pixel(i2+3,i1+i3,(t&0x10 ? fgc : bgc ));
				pixel(i2+4,i1+i3,(t&0x08 ? fgc : bgc ));
				pixel(i2+5,i1+i3,(t&0x04 ? fgc : bgc ));
				pixel(i2+6,i1+i3,(t&0x02 ? fgc : bgc ));
				pixel(i2+7,i1+i3,(t&0x01 ? fgc : bgc ));
			}
		}
	}

	DrawSprites();

}

/////////////////////////////////////////////////////////
// Draw bitmap graphics mode
/////////////////////////////////////////////////////////
void VDPgraphicsII()
{
	int t,o;						// temp variables
	int i1,i2,i3;					// temp variables
	int p_add, c_add;
	int fgc, bgc;
	int table, Poffset, Coffset;
	unsigned char ch=0xff;

	o=0;							// offset in SIT
	table=0; Poffset=0; Coffset=0;

	for (i1=0; i1<192; i1+=8)		// y loop
	{ 
		if ((i1==64)||(i1==128)) {
			table++;
			Poffset=table*0x800;
			Coffset=table*0x800;
		}

		for (i2=0; i2<256; i2+=8)	// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}
			
			p_add=PDT+(((ch<<3)+Poffset)&PDTsize);
			c_add=CT+(((ch<<3)+Coffset)&CTsize);
			o++;

			for (i3=0; i3<8; i3++)
			{	
				t=VDP[p_add++];
				fgc=VDP[c_add++];
				bgc=fgc&0x0f;
				fgc>>=4;
				{
					pixel(i2,i1+i3,(t&0x80 ? fgc : bgc ));
					pixel(i2+1,i1+i3,(t&0x40 ? fgc : bgc ));
					pixel(i2+2,i1+i3,(t&0x20 ? fgc : bgc ));
					pixel(i2+3,i1+i3,(t&0x10 ? fgc : bgc ));
					pixel(i2+4,i1+i3,(t&0x08 ? fgc : bgc ));
					pixel(i2+5,i1+i3,(t&0x04 ? fgc : bgc ));
					pixel(i2+6,i1+i3,(t&0x02 ? fgc : bgc ));
					pixel(i2+7,i1+i3,(t&0x01 ? fgc : bgc ));
				}
			}
		}
	}

	DrawSprites();

}

////////////////////////////////////////////////////////////////////////
// Draw text mode 40x24
////////////////////////////////////////////////////////////////////////
void VDPtext()
{ 
	int t,o;
	int i1,i2,i3;
	int c1, c2, p_add;
	unsigned char ch=0xff;

	t=VDPREG[7];
	c1=t&0xf;
	c2=t>>4;

	// erase border area
	for (i1=0; i1<8; i1++) {
		for (i2=0; i2<192; i2++) {
			pixel(i1, i2, c1);
			pixel(i1+248, i2, c1);
		}
	}

	o=0;										// offset in SIT
	for (i1=0; i1<192; i1+=8)					// y loop
	{ 
		for (i2=8; i2<248; i2+=6)				// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}

			p_add=PDT+(ch<<3);
			o++;

			for (i3=0; i3<8; i3++)		// 6 pixels wide
			{	
				t=VDP[p_add++];
				pixel(i2,i1+i3,(t&0x80 ? c2 : c1 ));
				pixel(i2+1,i1+i3,(t&0x40 ? c2 : c1 ));
				pixel(i2+2,i1+i3,(t&0x20 ? c2 : c1 ));
				pixel(i2+3,i1+i3,(t&0x10 ? c2 : c1 ));
				pixel(i2+4,i1+i3,(t&0x08 ? c2 : c1 ));
				pixel(i2+5,i1+i3,(t&0x04 ? c2 : c1 ));
			}
		}
	}
	// no sprites in text mode
}

////////////////////////////////////////////////////////////////////////
// Draw bitmap text mode 40x24
////////////////////////////////////////////////////////////////////////
void VDPtextII()
{ 
	int t,o;
	int i1,i2,i3;
	int c1, c2, p_add;
	int table, Poffset;
	unsigned char ch=0xff;

	t=VDPREG[7];
	c1=t&0xf;
	c2=t>>4;

	// erase border area
	for (i1=0; i1<8; i1++) {
		for (i2=0; i2<192; i2++) {
			pixel(i1, i2, c1);
			pixel(i1+248, i2, c1);
		}
	}

	o=0;							// offset in SIT
	table=0; Poffset=0;

	for (i1=0; i1<192; i1+=8)					// y loop
	{ 
		if ((i1==64)||(i1==128)) {
			table++;
			Poffset=table*0x800;
		}

		for (i2=8; i2<248; i2+=6)				// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}

			p_add=PDT+(((ch<<3)+Poffset)&PDTsize);
			o++;

			for (i3=0; i3<8; i3++)		// 6 pixels wide
			{	
				t=VDP[p_add++];
				pixel(i2,i1+i3,(t&0x80 ? c2 : c1 ));
				pixel(i2+1,i1+i3,(t&0x40 ? c2 : c1 ));
				pixel(i2+2,i1+i3,(t&0x20 ? c2 : c1 ));
				pixel(i2+3,i1+i3,(t&0x10 ? c2 : c1 ));
				pixel(i2+4,i1+i3,(t&0x08 ? c2 : c1 ));
				pixel(i2+5,i1+i3,(t&0x04 ? c2 : c1 ));
			}
		}
	}
	// no sprites in text mode
}

////////////////////////////////////////////////////////////////////////
// Draw text mode 80x24 (note: 80x26.5 mode not supported, blink not supported)
////////////////////////////////////////////////////////////////////////
void VDPtext80()
{ 
	int t,o;
	int i1,i2,i3;
	int c1, c2, p_add;
	unsigned char ch=0xff;

	t=VDPREG[7];
	c1=t&0xf;
	c2=t>>4;

	// erase border area
	for (i1=0; i1<8; i1++) {
		for (i2=0; i2<192; i2++) {
			pixel80(i1, i2, c1);
			pixel80(i1+488, i2, c1);
		}
	}

	o=0;										// offset in SIT
	for (i1=0; i1<192; i1+=8)					// y loop
	{ 
		for (i2=8; i2<488; i2+=6)				// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}

			p_add=PDT+(ch<<3);
			o++;

			for (i3=0; i3<8; i3++)		// 6 pixels wide
			{	
				t=VDP[p_add++];
				pixel80(i2,i1+i3,(t&0x80 ? c2 : c1 ));
				pixel80(i2+1,i1+i3,(t&0x40 ? c2 : c1 ));
				pixel80(i2+2,i1+i3,(t&0x20 ? c2 : c1 ));
				pixel80(i2+3,i1+i3,(t&0x10 ? c2 : c1 ));
				pixel80(i2+4,i1+i3,(t&0x08 ? c2 : c1 ));
				pixel80(i2+5,i1+i3,(t&0x04 ? c2 : c1 ));
			}
		}
	}
	// no sprites in text mode
	// TODO: except on the F18A
}

////////////////////////////////////////////////////////////////////////
// Draw Illegal mode (similar to text mode)
////////////////////////////////////////////////////////////////////////
void VDPillegal()
{ 
	int t;
	int i1,i2,i3;
	int c1, c2;

	t=VDPREG[7];
	c1=t&0xf;
	c2=t>>4;

	// erase border area
	for (i1=0; i1<8; i1++) {
		for (i2=0; i2<192; i2++) {
			pixel(i1, i2, c1);
			pixel(i1+248, i2, c1);
		}
	}

	// Each character is made up of rows of 4 pixels foreground, 2 pixels background

	for (i1=0; i1<192; i1+=8)					// y loop
	{ 
		for (i2=8; i2<248; i2+=6)				// x loop
		{ 
			for (i3=0; i3<8; i3++)				// 6 pixels wide
			{	
				pixel(i2,i1+i3,c2);
				pixel(i2+1,i1+i3,c2);
				pixel(i2+2,i1+i3,c2);
				pixel(i2+3,i1+i3,c2);
				pixel(i2+4,i1+i3,c1);
				pixel(i2+5,i1+i3,c1);
			}
		}
	}
	// no sprites in this mode
}

/////////////////////////////////////////////////////
// Draw Multicolor Mode
/////////////////////////////////////////////////////
void VDPmulticolor() 
{
	int o;								// temp variables
	int i1,i2,i3, i4;					// temp variables
	int p_add;
	int fgc, bgc;
	int off;
	unsigned char ch=0xff;

	o=0;							// offset in SIT
	off=0;							// offset in pattern

	for (i1=0; i1<192; i1+=8)									// y loop
	{ 
		for (i2=0; i2<256; i2+=8)								// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}

			p_add=PDT+(ch<<3)+off;
			o++;

			for (i3=0; i3<7; i3+=4)
			{	
				fgc=VDP[p_add++];
				bgc=fgc&0x0f;
				fgc>>=4;
	
				for (i4=0; i4<4; i4++) {
					pixel(i2,i1+i3+i4,fgc);
					pixel(i2+1,i1+i3+i4,fgc);
					pixel(i2+2,i1+i3+i4,fgc);
					pixel(i2+3,i1+i3+i4,fgc);
					pixel(i2+4,i1+i3+i4,bgc);
					pixel(i2+5,i1+i3+i4,bgc);
					pixel(i2+6,i1+i3+i4,bgc);
					pixel(i2+7,i1+i3+i4,bgc);
				}
			}
		}
		off+=2;
		if (off>7) off=0;
	}

	DrawSprites();

	return;
}

/////////////////////////////////////////////////////
// Draw Bitmap Multicolor Mode
/////////////////////////////////////////////////////
void VDPmulticolorII() 
{
	int o;								// temp variables
	int i1,i2,i3, i4;					// temp variables
	int p_add;
	int fgc, bgc;
	int off;
	int table, Poffset;
	unsigned char ch=0xff;

	o=0;							// offset in SIT
	off=0;							// offset in pattern
	table=0; Poffset=0;

	for (i1=0; i1<192; i1+=8)									// y loop
	{ 
		if ((i1==64)||(i1==128)) {
			table++;
			Poffset=table*0x800;
		}

		for (i2=0; i2<256; i2+=8)								// x loop
		{ 
			if (VDPDebug) {
				ch++;
			} else {
				ch=VDP[SIT+o];
			}

			p_add=PDT+(((ch<<3)+Poffset)&PDTsize);
			o++;

			for (i3=0; i3<7; i3+=4)
			{	
				fgc=VDP[p_add++];
				bgc=fgc&0x0f;
				fgc>>=4;
	
				for (i4=0; i4<4; i4++) {
					pixel(i2,i1+i3+i4,fgc);
					pixel(i2+1,i1+i3+i4,fgc);
					pixel(i2+2,i1+i3+i4,fgc);
					pixel(i2+3,i1+i3+i4,fgc);
					pixel(i2+4,i1+i3+i4,bgc);
					pixel(i2+5,i1+i3+i4,bgc);
					pixel(i2+6,i1+i3+i4,bgc);
					pixel(i2+7,i1+i3+i4,bgc);
				}
			}
		}
		off+=2;
		if (off>7) off=0;
	}

	DrawSprites();

	return;
}

////////////////////////////////////////////////////////////////
// Stretch-blit the buffer into the active window
//
// NOTES: Graphics modes we have (and some we need)
// 272x208 -- the standard default pixel mode of the 9918A plus a fixed (incorrect) border
// 
//
// NOTES: Graphics modes we have (and some we need)
// 272x208 -- the standard default pixel mode of the 9918A plus a fixed (incorrect) border
// 544x413 -- the double-sized filters (minus 1 scanline due to corruption)
// 1088x832 - HQ4x buffer
// 634x413 -- TV mode
// 528x208 -- 80-column mode
// These are all with 24 rows -- the F18A adds a 26.5 row mode (212 pixels) (todo: or was it 240?)
// So this adds another 20 (or 48) pixels to each mode
// One solution might be to simply render a fixed TV display and scale to fit...
// The only place it really matters if resolution changes is video recording.
// In that case, the vertical can always be the same - the extra rows just cut into overscan.
// Horizontal, unscaled, is either 272, 528 or 634. We could adapt a buffer size that
// fits all, maybe, and just adjust the amount of overscan area...?
// Alternately... maybe we just blit whatever into a fixed size video buffer (say, 2x) and be done?
////////////////////////////////////////////////////////////////
void doBlit()
{
	RECT rect1, rect2;
	int x,y;
	HRESULT ret;

	if (!TryEnterCriticalSection(&VideoCS)) {
		return;		// do it later
	}

	GetClientRect(myWnd, &rect1);
	myDC=GetDC(myWnd);
	SetStretchBltMode(myDC, COLORONCOLOR);

	// TODO: hacky city - 80-column mode doesn't filter or anything, cause we'd have to change ALL the stuff below.
	if ((bEnable80Columns)&&(VDPREG[0]&0x04)&&(VDPREG[1]&0x10)) {
		// render 80 columns to the screen using DIB blit
		StretchDIBits(myDC, 0, 0, rect1.right-rect1.left, rect1.bottom-rect1.top, 0, 0, 512+16, 192+16, framedata, &myInfo80Col, 0, SRCCOPY);
		ReleaseDC(myWnd, myDC);
		LeaveCriticalSection(&VideoCS);
		return;
	}

	// make sure filters work before calling them
	if (FilterMode == 4) {
		if ((!TVFiltersAvailable) || (NULL == sms_ntsc_blit)) {
			MessageBox(myWnd, "Filter DLL not available - reverting to no filter.", "Classic99 Error", MB_OK);
			PostMessage(myWnd, WM_COMMAND, ID_VIDEO_FILTERMODE_NONE, 0);
			ReleaseDC(myWnd, myDC);
			LeaveCriticalSection(&VideoCS);
			return;
		}
	}
	if (FilterMode == 5) {
		if ((NULL == hHQ4DLL) || (NULL == hq4x_init)) {
			MessageBox(myWnd, "HQ4 DLL not available - reverting to no filter.", "Classic99 Error", MB_OK);
			PostMessage(myWnd, WM_COMMAND, ID_VIDEO_FILTERMODE_NONE, 0);
			ReleaseDC(myWnd, myDC);
			LeaveCriticalSection(&VideoCS);
			return;
		}
	}

	// Do the filtering - we throw away the top and bottom 3 scanlines due to some garbage there - it's border anyway
	switch (FilterMode) {
	case 1: // 2xSaI
		_2xSaI((uint8*) framedata+((256+16)*4), ((256+16)*4), NULL, (uint8*)framedata2, (512+32)*4, 256+16, 191+16);
		break;
	case 2: // Super2xSaI
		Super2xSaI((uint8*) framedata+((256+16)*4), ((256+16)*4), NULL, (uint8*)framedata2, (512+32)*4, 256+16, 191+16);
		break;
	case 3: // SuperEagle
		SuperEagle((uint8*) framedata+((256+16)*4), ((256+16)*4), NULL, (uint8*)framedata2, (512+32)*4, 256+16, 191+16);
		break;
	case 4:	// TV filter
		// This filter outputs 602 pixels for 256 in. What we should do is resize the window
		// we eventually produce a TV_WIDTH x 384+29 image (leaving vertical the same)
		sms_ntsc_blit(&tvFilter, framedata, 256+16, 256+16, 192+16, framedata2, (TV_WIDTH)*2*4);
		if (TVScanLines) {
			sms_ntsc_scanlines(framedata2, TV_WIDTH, (TV_WIDTH)*4, 384+29);
		} else {
			// Duplicate every line instead
			for (int y=1; y<384+29; y+=2) {
				memcpy(&framedata2[y*TV_WIDTH], &framedata2[(y-1)*TV_WIDTH], sizeof(framedata2[0])*TV_WIDTH);
			}
		}
		break;
	case 5:	// HQ4x filter - super hi-def!
		{
			if (NULL != hq4x_process) {
				hq4x_process((unsigned char*)framedata, (unsigned char*)framedata2);
			}
		}
	}

	switch (StretchMode) {
	case 1:	// DIB
		switch (FilterMode) {
		case 0:		// none
			StretchDIBits(myDC, 0, 0, rect1.right-rect1.left, rect1.bottom-rect1.top, 0, 0, 256+16, 192+16, framedata, &myInfo, 0, SRCCOPY);
			break;

		case 4:		// TV
			StretchDIBits(myDC, 0, 0, rect1.right-rect1.left, rect1.bottom-rect1.top, 0, 0, TV_WIDTH, 384+29, framedata2, &myInfoTV, 0, SRCCOPY);
			break;

		case 5:		// hq4x
			StretchDIBits(myDC, 0, 0, rect1.right-rect1.left, rect1.bottom-rect1.top, 0, 0, (256+16)*4, (192+16)*4, framedata2, &myInfo32, 0, SRCCOPY);
			break;

		default:	// all the SAI ones
			StretchDIBits(myDC, 0, 0, rect1.right-rect1.left, rect1.bottom-rect1.top, 0, 0, 512+32, 384+29, framedata2, &myInfo2, 0, SRCCOPY);
		}
		break;

	case 2: // DX
		if (NULL == lpdd) {
			SetupDirectDraw(0);
			if (NULL == lpdd) {
				StretchMode=0;
				break;
			}
		}

		if (DD_OK != lpdd->TestCooperativeLevel()) {
			break;
		}

		if (NULL == ddsBack) {
			StretchMode=0;
			break;
		}

		if (DD_OK == ddsBack->GetDC(&tmpDC)) {	// color depth translation
			switch (FilterMode) {
				case 0:
					// original buffer
					SetDIBitsToDevice(tmpDC, 0, 0, 256+16, 192+16, 0, 0, 0, 192+16, framedata, &myInfo, DIB_RGB_COLORS);
					break;

				case 4:
					// TV buffer
					SetDIBitsToDevice(tmpDC, 0, 0, TV_WIDTH, 384+29, 0, 0, 0, 384+29, framedata2, &myInfoTV, DIB_RGB_COLORS);
					break;
				
				case 5:
					// 4x buffer
					SetDIBitsToDevice(tmpDC, 0, 0, (256+16)*4, (192+16)*4, 0, 0, 0, (192+16)*4, framedata2, &myInfo32, DIB_RGB_COLORS);
					break;

				default:
					// 2x buffer
					SetDIBitsToDevice(tmpDC, 0, 0, 512+32, 384+29, 0, 0, 0, 384+29, framedata2, &myInfo2, DIB_RGB_COLORS);
					break;
			}
		}
		ddsBack->ReleaseDC(tmpDC);
		GetWindowRect(myWnd, &rect2);
		// rect1 contains client coordinates (with the correct size!)
		// rect2 contains window coordinates

		POINT pt;
		pt.x = 0;
		pt.y = 0;
		ClientToScreen(myWnd, &pt);
		rect1.top = pt.y;
		rect1.bottom += pt.y;
		rect1.left = pt.x;
		rect1.right+= pt.x;

		// The DirectDraw blit will draw using screen coordinates but into the client area thanks to the clipper
		if (DDERR_SURFACELOST == lpdds->Blt(&rect1, ddsBack, NULL, DDBLT_DONOTWAIT, NULL)) {	// Just go as quick as we can, don't bother waiting
			lpdd->RestoreAllSurfaces();
		}
		break;

	case 3: // DX Full
		if (NULL == lpdd) {
			SetupDirectDraw(FullScreenMode);
			if (NULL == lpdd) {
				StretchMode=0;
				break;
			}
		}

		if (DD_OK != lpdd->TestCooperativeLevel()) {
			break;
		}
		
		if (NULL == ddsBack) {
			StretchMode=0;
			break;
		}
		if (DD_OK == ddsBack->GetDC(&tmpDC)) {	// color depth translation
			switch (FilterMode) {
				case 0:		// none
					SetDIBitsToDevice(tmpDC, 0, 0, 256+16, 192+16, 0, 0, 0, 192+16, framedata, &myInfo, DIB_RGB_COLORS);
					break;

				case 4:		// tv
					SetDIBitsToDevice(tmpDC, 0, 0, TV_WIDTH, 384+29, 0, 0, 0, 384+29, framedata2, &myInfoTV, DIB_RGB_COLORS);
					break;

				case 5:		// hq4x
					SetDIBitsToDevice(tmpDC, 0, 0, (256+16)*4, (192+16)*4, 0, 0, 0, (192+16)*4, framedata2, &myInfo32, DIB_RGB_COLORS);
					break;

				default:	// 2x
					SetDIBitsToDevice(tmpDC, 0, 0, 512+32, 384+29, 0, 0, 0, 384+29, framedata2, &myInfo2, DIB_RGB_COLORS);
					break;
			}
		}
		ddsBack->ReleaseDC(tmpDC);
		if (DD_OK != (ret=lpdds->Blt(NULL, ddsBack, NULL, DDBLT_DONOTWAIT, NULL))) {
			if (DDERR_SURFACELOST == ret) {
				lpdd->RestoreAllSurfaces();
			}
		}
		break;

	default:// None
		// Center it in the window, whatever size
		switch (FilterMode) {
		case 0:		// none
			x=(rect1.right-rect1.left-(256+16))/2;
			y=(rect1.bottom-rect1.top-(192+16))/2;
			x=SetDIBitsToDevice(myDC, x, y, 256+16, 192+16, 0, 0, 0, 192+16, framedata, &myInfo, DIB_RGB_COLORS);
			y=GetLastError();
			break;
		
		case 4:		// TV
			x=(rect1.right-rect1.left-(TV_WIDTH))/2;
			y=(rect1.bottom-rect1.top-(384+29))/2;
			SetDIBitsToDevice(myDC, x, y, TV_WIDTH, 384+29, 0, 0, 0, 384+29, framedata2, &myInfoTV, DIB_RGB_COLORS);
			break;

		case 5:		// hq4x
			x=(rect1.right-rect1.left-(256+16)*4)/2;
			y=(rect1.bottom-rect1.top-(192+16)*4)/2;
			x=SetDIBitsToDevice(myDC, x, y, (256+16)*4, (192+16)*4, 0, 0, 0, (192+16)*4, framedata2, &myInfo32, DIB_RGB_COLORS);
			y=GetLastError();
			break;

		default:	// 2x
			x=(rect1.right-rect1.left-(512+32))/2;
			y=(rect1.bottom-rect1.top-(384+29))/2;
			SetDIBitsToDevice(myDC, x, y, 512+32, 384+29, 0, 0, 0, 384+29, framedata2, &myInfo2, DIB_RGB_COLORS);
			break;
		}
		break;
	}

	ReleaseDC(myWnd, myDC);

	LeaveCriticalSection(&VideoCS);
}

//////////////////////////////////////////////////////////
// Draw Sprites into the backbuffer
//////////////////////////////////////////////////////////
void DrawSprites()
{
	int i1, i2, i3, xx, yy, pat, col, p_add, t, sc;
	int highest;
	int curSAL;

	// a hacky, but effective 4-sprite-per-line limitation emulation
	// We can do this right when we have scanline based VDP
	char nLines[192];
	char bSkipScanLine[32][32];		// 32 sprites, 32 lines max
	int b5OnLine=0;					// first sprite >4 on scanline

	if (bDisableSprite) {
		return;
	}

	memset(nLines, 0, sizeof(nLines));
	memset(bSkipScanLine, 0, sizeof(bSkipScanLine));

	// set up the draw
	memset(SprColBuf, 0, 256*192);
	SprColFlag=0;
	
	highest=31;

	// find the highest active sprite
	for (i1=0; i1<32; i1++)			// 32 sprites 
	{
		yy=VDP[SAL+(i1<<2)];
		if (yy==0xd0)
		{
			highest=i1-1;
			break;
		}
	}
	
	if (bUse5SpriteLimit) {
		// go through the sprite table and check if any scanlines are obliterated by 4-per-line
		i3=8;							// number of sprite scanlines
		if (VDPREG[1] & 0x2) {			 // TODO: Handle F18A ECM where sprites are doubled individually
			// double-sized
			i3*=2;
		}
		if (VDPREG[1]&0x01)	{
			// magnified sprites
			i3*=2;
		}
		for (i1=0; i1<=highest; i1++) {
			curSAL=SAL+(i1<<2);
			yy=VDP[curSAL]+1;				// sprite Y, it's stupid, cause 255 is line 0 
			if (yy>225) yy-=256;			// fade in from top
			t=yy;
			for (i2=0; i2<i3; i2++,t++) {
				if ((t>=0) && (t<=191)) {
					nLines[t]++;
					if (nLines[t]>4) {
						b5OnLine=i1;
						bSkipScanLine[i1][i2]=1;
					}
				}
			}
		}
	}

	// now draw
	for (i1=highest; i1>=0; i1--)	
	{	
		curSAL=SAL+(i1<<2);
		yy=VDP[curSAL++]+1;				// sprite Y, it's stupid, cause 255 is line 0 
		if (yy>225) yy-=256;			// fade in from top: TODO: is this right??
		xx=VDP[curSAL++];				// sprite X 
		pat=VDP[curSAL++];				// sprite pattern
		int dblSize = F18AECModeSprite ? VDP[curSAL] & 0x10 : VDPREG[1] & 0x2;
		if (dblSize) {
			pat=pat&0xfc;				// if double-sized, it must be a multiple of 4
		}
		col=VDP[curSAL]&0xf;			// sprite color 
	
		if (VDP[curSAL++]&0x80)	{		// early clock
			xx-=32;
		}

		// Even transparent sprites get drawn into the collision buffer
		p_add=SDT+(pat<<3);
		sc=0;						// current scanline
		
		// Added by Rasmus M
		// TODO: For ECM 1 we need one more bit from R24
		int paletteBase = F18AECModeSprite ? (col >> (F18AECModeSprite - 2)) * F18ASpritePaletteSize : 0;
		int F18ASpriteColorLine[8]; // Colors indices for each of the 8 pixels in a sprite scan line

		if (VDPREG[1]&0x01)	{		// magnified sprites
			for (i3=0; i3<16; i3+=2)
			{	
				t = pixelMask(p_add++, F18ASpriteColorLine);	// Modified by RasmusM. Sets up the F18ASpriteColorLine[] array.

				if (!bSkipScanLine[i1][sc]) {
					if (t&0x80) 
						bigpixel(xx, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
					if (t&0x40)
						bigpixel(xx+2, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
					if (t&0x20)
						bigpixel(xx+4, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
					if (t&0x10)
						bigpixel(xx+6, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
					if (t&0x08)
						bigpixel(xx+8, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
					if (t&0x04)
						bigpixel(xx+10, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
					if (t&0x02)
						bigpixel(xx+12, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
					if (t&0x01)
						bigpixel(xx+14, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
				}

				if (dblSize)		// double-size sprites, need to draw 3 more chars 
				{	
					t = pixelMask(p_add + 7, F18ASpriteColorLine);	// Modified by RasmusM
	
					if (!bSkipScanLine[i1][sc+16]) {
						if (t&0x80)
							bigpixel(xx, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							bigpixel(xx+2, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							bigpixel(xx+4, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)
							bigpixel(xx+6, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							bigpixel(xx+8, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							bigpixel(xx+10, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							bigpixel(xx+12, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							bigpixel(xx+14, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);

						t = pixelMask(p_add + 23, F18ASpriteColorLine);	// Modified by RasmusM
						if (t&0x80)
							bigpixel(xx+16, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							bigpixel(xx+18, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							bigpixel(xx+20, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)
							bigpixel(xx+22, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							bigpixel(xx+24, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							bigpixel(xx+26, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							bigpixel(xx+28, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							bigpixel(xx+30, yy+i3+16, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
					}

					if (!bSkipScanLine[i1][sc]) {
						t = pixelMask(p_add + 15, F18ASpriteColorLine);	// Modified by RasmusM
						if (t&0x80)
							bigpixel(xx+16, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							bigpixel(xx+18, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							bigpixel(xx+20, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)	
							bigpixel(xx+22, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							bigpixel(xx+24, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							bigpixel(xx+26, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							bigpixel(xx+28, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							bigpixel(xx+30, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
					}
				}
				sc+=2;
			}
		} else {
			for (i3=0; i3<8; i3++)
			{	
				t = pixelMask(p_add++, F18ASpriteColorLine);	// Modified by RasmusM

				if (!bSkipScanLine[i1][sc]) {
					if (t&0x80)
						spritepixel(xx, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
					if (t&0x40)
						spritepixel(xx+1, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
					if (t&0x20)
						spritepixel(xx+2, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
					if (t&0x10)
						spritepixel(xx+3, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
					if (t&0x08)
						spritepixel(xx+4, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
					if (t&0x04)
						spritepixel(xx+5, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
					if (t&0x02)
						spritepixel(xx+6, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
					if (t&0x01)
						spritepixel(xx+7, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
				}

				if (dblSize)		// double-size sprites, need to draw 3 more chars 
				{	
					t = pixelMask(p_add + 7, F18ASpriteColorLine);	// Modified by RasmusM

					if (!bSkipScanLine[i1][sc+8]) {
						if (t&0x80)
							spritepixel(xx, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							spritepixel(xx+1, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							spritepixel(xx+2, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)
							spritepixel(xx+3, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							spritepixel(xx+4, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							spritepixel(xx+5, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							spritepixel(xx+6, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							spritepixel(xx+7, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);

						t = pixelMask(p_add + 23, F18ASpriteColorLine);	// Modified by RasmusM
						if (t&0x80)
							spritepixel(xx+8, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							spritepixel(xx+9, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							spritepixel(xx+10, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)
							spritepixel(xx+11, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							spritepixel(xx+12, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							spritepixel(xx+13, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							spritepixel(xx+14, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							spritepixel(xx+15, yy+i3+8, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
					}

					if (!bSkipScanLine[i1][sc]) {
						t = pixelMask(p_add + 15, F18ASpriteColorLine);	// Modified by RasmusM
						if (t&0x80)
							spritepixel(xx+8, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[0] : col);
						if (t&0x40)
							spritepixel(xx+9, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[1] : col);
						if (t&0x20)
							spritepixel(xx+10, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[2] : col);
						if (t&0x10)	
							spritepixel(xx+11, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[3] : col);
						if (t&0x08)
							spritepixel(xx+12, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[4] : col);
						if (t&0x04)
							spritepixel(xx+13, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[5] : col);
						if (t&0x02)
							spritepixel(xx+14, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[6] : col);
						if (t&0x01)
							spritepixel(xx+15, yy+i3, F18AECModeSprite ? paletteBase + F18ASpriteColorLine[7] : col);
					}
				}
				sc++;
			}
		}
	}
	// Set the VDP collision bit
	if (SprColFlag) {
		VDPS|=VDPS_SCOL;
	}
	if (b5OnLine) {
		VDPS|=VDPS_5SPR;
		VDPS&=(VDPS_INT|VDPS_5SPR|VDPS_SCOL);
		VDPS|=b5OnLine&(~(VDPS_INT|VDPS_5SPR|VDPS_SCOL));
	}
}

////////////////////////////////////////////////////////////
// Draw a pixel onto the backbuffer surface
////////////////////////////////////////////////////////////
void pixel(int x, int y, int c)
{
	framedata[((199-y)<<8)+((199-y)<<4)+x+8]=TIPALETTE[c];
}

////////////////////////////////////////////////////////////
// Draw a pixel onto the backbuffer surface in 80 column mode
////////////////////////////////////////////////////////////
void pixel80(int x, int y, int c)
{
	framedata[((199-y)<<9)+((199-y)<<4)+x+8]=TIPALETTE[c];
}

////////////////////////////////////////////////////////////
// Draw a range-checked pixel onto the backbuffer surface
////////////////////////////////////////////////////////////
void spritepixel(int x, int y, int c)
{
	if ((y>191)||(y<0)) return;
	if ((x>255)||(x<0)) return;
	
	if (SprColBuf[x][y]) {
		SprColFlag=1;
	} else {
		SprColBuf[x][y]=1;
	}

	if (!(F18AECModeSprite ? c % F18ASpritePaletteSize : c)) return;		// don't DRAW transparent, Modified by RasmusM
	framedata[((199-y)<<8)+((199-y)<<4)+x+8] = F18AECModeSprite ? F18APalette[c] : TIPALETTE[c];	// Modified by RasmusM
	return;
}

////////////////////////////////////////////////////////////
// Draw a magnified pixel onto the backbuffer surface
////////////////////////////////////////////////////////////
void bigpixel(int x, int y, int c)
{
	spritepixel(x,y,c);
	spritepixel(x+1,y,c);
	spritepixel(x,y+1,c);
	spritepixel(x+1,y+1,c);
}

////////////////////////////////////////////////////////////
// Pixel mask
////////////////////////////////////////////////////////////
int pixelMask(int addr, int F18ASpriteColorLine[])
{
	int t = VDP[addr];
	if (F18AECModeSprite > 0) {
		for (int pix = 0; pix < 8; pix++) {
			F18ASpriteColorLine[pix] = ((t >> (7 - pix)) & 0x01);
		}		
		if (F18AECModeSprite > 1) {
			int t1 = VDP[addr + 0x0800]; 
			for (int pix = 0; pix < 8; pix++) {
				F18ASpriteColorLine[pix] |= ((t1 >> (7 - pix)) & 0x01) << 1;
			}		
			t |= t1;
			if (F18AECModeSprite > 2) {
				int t2 = VDP[addr + 0x1000]; 
				for (int pix = 0; pix < 8; pix++) {
					F18ASpriteColorLine[pix] |= ((t2 >> (7 - pix)) & 0x01) << 2;
				}		
				t |= t2;
			}
		}
	}
	return t;
}

////////////////////////////////////////////////////////////
// DirectX full screen enumeration callback
////////////////////////////////////////////////////////////
HRESULT WINAPI myCallBack(LPDDSURFACEDESC2 ddSurface, LPVOID pData) {
	int *c;

	c=(int*)pData;

	if (ddSurface->ddpfPixelFormat.dwRGBBitCount == (DWORD)*c) {
		*c=(*c)|0x80;
		return DDENUMRET_CANCEL;
	}
	return DDENUMRET_OK;
}

////////////////////////////////////////////////////////////
// Setup DirectDraw, with the requested fullscreen mode
// In order for Fullscreen to work, only the main thread
// may call this function!
////////////////////////////////////////////////////////////
void SetupDirectDraw(int fullscreen) {
	int x,y,c;
	RECT myRect;

	EnterCriticalSection(&VideoCS);

	// directdraw is deprecated -- for now we can still do this, but
	// we need to replace this API with Direct3D (TODO)
    HINSTANCE hInstDDraw;
    LPDIRECTDRAWCREATEEX pDDCreate = NULL;

    hInstDDraw = LoadLibrary( "ddraw.dll" );
    if( hInstDDraw == NULL ) {
		MessageBox(myWnd, "Can't load DLL for DirectDraw 7\nClassic99 Requires DirectX 7 for DX and Full screen modes", "Classic99 Error", MB_OK);
		lpdd=NULL;
		StretchMode=0;
		goto optout;
	}

    pDDCreate = ( LPDIRECTDRAWCREATEEX )GetProcAddress( hInstDDraw, "DirectDrawCreateEx" );

	if (pDDCreate(NULL, (void**)&lpdd, IID_IDirectDraw7, NULL)!=DD_OK) {
		MessageBox(myWnd, "Unable to initialize DirectDraw 7\nClassic99 Requires DirectX 7 for DX and Full screen modes", "Classic99 Error", MB_OK);
		lpdd=NULL;
		StretchMode=0;
	} else {
		if (fullscreen) {
			DDSURFACEDESC2 myDesc;

			GetWindowRect(myWnd, &myRect);

			switch (fullscreen) {
				case 1: x=320; y=240; c=8; break;
				case 2: x=640; y=480; c=8; break;
				case 3: x=640; y=480; c=16; break;
				case 4: x=640; y=480; c=32; break;
				case 5: x=800; y=600; c=16; break;
				case 6: x=800; y=600; c=32; break;
				case 7: x=1024; y=768; c=16; break;
				case 8: x=1024; y=768; c=32; break;
				default:x=640; y=480; c=16; break;
			}

			// Check if mode is legal
			ZeroMemory(&myDesc, sizeof(myDesc));
			myDesc.dwSize=sizeof(myDesc);
			myDesc.dwFlags=DDSD_HEIGHT | DDSD_WIDTH;
			myDesc.dwWidth=x;
			myDesc.dwHeight=y;
			lpdd->EnumDisplayModes(0, &myDesc, (void*)&c, myCallBack);
			// If a valid mode was found, 'c' has 0x80 ORd with it
			if (0 == (c&0x80)) {
				MessageBox(myWnd, "Requested graphics mode is not supported on the primary display.", "Classic99 Error", MB_OK);
				if (lpdd) lpdd->Release();
				lpdd=NULL;
				StretchMode=0;
				MoveWindow(myWnd, myRect.left, myRect.top, myRect.right-myRect.left, myRect.bottom-myRect.top, true);
				goto optout;
			}

			c&=0x7f;	// Remove the flag bit

			if (lpdd->SetCooperativeLevel(myWnd, DDSCL_EXCLUSIVE | DDSCL_ALLOWREBOOT | DDSCL_FULLSCREEN | DDSCL_ALLOWMODEX)!=DD_OK) {
				MessageBox(myWnd, "Unable to set cooperative level\nFullscreen DX is not available", "Classic99 Error", MB_OK);
				if (lpdd) lpdd->Release();
				lpdd=NULL;
				StretchMode=0;
				MoveWindow(myWnd, myRect.left, myRect.top, myRect.right-myRect.left, myRect.bottom-myRect.top, true);
				goto optout;
			}

			if (lpdd->SetDisplayMode(x,y,c,0,0) != DD_OK) {
				MessageBox(myWnd, "Unable to set display mode.\nRequested DX mode is not available", "Classic99 Error", MB_OK);
				MoveWindow(myWnd, myRect.left, myRect.top, myRect.right-myRect.left, myRect.bottom-myRect.top, true);
				StretchMode=0;
				goto optout;
			}
		} else {
			if (lpdd->SetCooperativeLevel(myWnd, DDSCL_NORMAL)!=DD_OK) {
				MessageBox(myWnd, "Unable to set cooperative level\nDX mode is not available", "Classic99 Error", MB_OK);
				if (lpdd) lpdd->Release();
				lpdd=NULL;
				StretchMode=0;
				goto optout;
			}
		}

		ZeroMemory(&CurrentDDSD, sizeof(CurrentDDSD));
		CurrentDDSD.dwSize=sizeof(CurrentDDSD);
		CurrentDDSD.dwFlags=DDSD_CAPS;
		CurrentDDSD.ddsCaps.dwCaps=DDSCAPS_PRIMARYSURFACE;

		if (lpdd->CreateSurface(&CurrentDDSD, &lpdds, NULL) !=DD_OK) {
			MessageBox(myWnd, "Unable to create primary surface\nDX mode is not available", "Classic99 Error", MB_OK);
			if (lpdd) lpdd->Release();
			lpdd=NULL;
			StretchMode=0;
			goto optout;
		}

		ZeroMemory(&CurrentDDSD, sizeof(CurrentDDSD));
		CurrentDDSD.dwSize=sizeof(CurrentDDSD);
		CurrentDDSD.dwFlags=DDSD_HEIGHT | DDSD_WIDTH;
		switch (FilterMode) {
			case 0:		// none
				CurrentDDSD.dwWidth=256+16;
				CurrentDDSD.dwHeight=192+16;
				break;

			case 4:		// TV
				CurrentDDSD.dwWidth=TV_WIDTH;
				CurrentDDSD.dwHeight=384+29;
				break;

			case 5:		// hq4x
				CurrentDDSD.dwWidth=(256+16)*4;
				CurrentDDSD.dwHeight=(192+16)*4;
				break;

			default:	// others (*2)
				CurrentDDSD.dwWidth=512+32;
				CurrentDDSD.dwHeight=384+29;
				break;
		}

		if (lpdd->CreateSurface(&CurrentDDSD, &ddsBack, NULL) !=DD_OK) {
			MessageBox(myWnd, "Unable to create back buffer surface\nDX mode is not available", "Classic99 Error", MB_OK);
			ddsBack=NULL;
			lpdds->Release();
			lpdds=NULL;
			lpdd->Release();
			lpdd=NULL;
			StretchMode=0;
			goto optout;
		}

		if (!fullscreen) {
			if (lpdd->CreateClipper(0, &lpDDClipper, NULL) != DD_OK) {
				MessageBox(myWnd, "Warning: Unable to create Direct Draw Clipper", "Classic99 Warning", MB_OK);
			} else {
				if (lpDDClipper->SetHWnd(0, myWnd) != DD_OK) {
					MessageBox(myWnd, "Warning: Unable to set Clipper Window", "Classic99 Warning", MB_OK);
					lpDDClipper->Release();
					lpDDClipper=NULL;
				} else {
					if (DD_OK != lpdds->SetClipper(lpDDClipper)) {
						MessageBox(myWnd, "Warning: Unable to attach Clipper", "Classic99 Warning", MB_OK);
						lpDDClipper->Release();
						lpDDClipper=NULL;
					}
				}
			}
		}
	}
	LeaveCriticalSection(&VideoCS);
	return;

optout: ;
	takedownDirectDraw();
	LeaveCriticalSection(&VideoCS);
}

////////////////////////////////////////////////////////////
// Release all references to DirectDraw objects
////////////////////////////////////////////////////////////
void takedownDirectDraw() {	
	EnterCriticalSection(&VideoCS);

	if (NULL != lpDDClipper) lpDDClipper->Release();
	lpDDClipper=NULL;
	if (NULL != ddsBack) ddsBack->Release();
	ddsBack=NULL;
	if (NULL != lpdds) lpdds->Release();
	lpdds=NULL;
	if (NULL != lpdd) lpdd->Release();
	lpdd=NULL;

	LeaveCriticalSection(&VideoCS);
}

////////////////////////////////////////////////////////////
// Resize the back buffer
////////////////////////////////////////////////////////////
int ResizeBackBuffer(int w, int h) {
	EnterCriticalSection(&VideoCS);

	if (NULL != ddsBack) ddsBack->Release();
	ddsBack=NULL;

	if (NULL == lpdd) {
		SetupDirectDraw(0);
		if (NULL == lpdd) {
			MessageBox(myWnd, "Unable to create back buffer surface\nDX mode is not available", "Classic99 Error", MB_OK);
			ddsBack=NULL;
			StretchMode=0;
			LeaveCriticalSection(&VideoCS);
			return 1;
		}
	}

	ZeroMemory(&CurrentDDSD, sizeof(CurrentDDSD));
	CurrentDDSD.dwSize=sizeof(CurrentDDSD);
	CurrentDDSD.dwFlags=DDSD_HEIGHT | DDSD_WIDTH;
	CurrentDDSD.dwWidth=w;
	CurrentDDSD.dwHeight=h;

	if (lpdd->CreateSurface(&CurrentDDSD, &ddsBack, NULL) != DD_OK) {
		MessageBox(myWnd, "Unable to create back buffer surface\nDX mode is not available", "Classic99 Error", MB_OK);
		ddsBack=NULL;
		StretchMode=0;
		LeaveCriticalSection(&VideoCS);
		return 1;
	}

	LeaveCriticalSection(&VideoCS);
	return 0;
}

//////////////////////////////////////
// Save a screenshot - just BMP for now
// there are lots of nice helpers for others in
// 2000 and higher, but that's ok 
//////////////////////////////////////
void SaveScreenshot(bool bAuto, bool bFiltered) {
	static int nLastNum=0;
	static CString csFile;
	CString csTmp;
	OPENFILENAME ofn;
	char buf[256], buf2[256];
	BOOL bRet;

	if ((!bAuto) || (csFile.IsEmpty())) {
		memset(&ofn, 0, sizeof(OPENFILENAME));
		ofn.lStructSize=sizeof(OPENFILENAME);
		ofn.hwndOwner=myWnd;
		ofn.lpstrFilter="BMP Files\0*.bmp\0\0";
		strcpy(buf, "");
		ofn.lpstrFile=buf;
		ofn.nMaxFile=256;
		strcpy(buf2, "");
		ofn.lpstrFileTitle=buf2;
		ofn.nMaxFileTitle=256;
		ofn.Flags=OFN_HIDEREADONLY|OFN_OVERWRITEPROMPT;

		char szTmpDir[MAX_PATH];
		GetCurrentDirectory(MAX_PATH, szTmpDir);

		bRet = GetSaveFileName(&ofn);

		SetCurrentDirectory(szTmpDir);

		csTmp = ofn.lpstrFile;				// save the file we are opening now
		if (ofn.nFileExtension > 1) {
			csFile = csTmp.Left(ofn.nFileExtension-1);
		} else {
			csFile = csTmp;
			csTmp+=".bmp";
		}
	} else {
		int nCnt=10000;
		for (;;) {
			csTmp.Format("%s%04d.bmp", (LPCSTR)csFile, nLastNum++);
			FILE *fp=fopen(csTmp, "r");
			if (NULL != fp) {
				fclose(fp);
				nCnt--;
				if (nCnt == 0) {
					MessageBox(myWnd, "Can't take another auto screenshot without overwriting file!", "Classic99 Error", MB_OK);
					return;
				}
				continue;
			}
			break;
		}
	}

	if (bRet) {
		// we just create a 24-bit BMP file
		int nX, nY, nBits;
		unsigned char *pBuf;

		if (bFiltered) {
			switch (FilterMode) {
			case 0:		// none
				nX=256+16;
				nY=192+16;
				pBuf=(unsigned char*)framedata;
				nBits=32;
				break;
			
			case 4:		// TV
				nX=TV_WIDTH;
				nY=384+29;
				pBuf=(unsigned char*)framedata2;
				nBits=32;
				break;

			case 5:		// hq4x
				nX=(256+16)*4;
				nY=(192+16)*4;
				pBuf=(unsigned char*)framedata2;
				nBits=32;
				break;

			default:	// All SAI2x modes
				nX=512+32;
				nY=384+29;
				pBuf=(unsigned char*)framedata2;
				nBits=32;
				break;
			}
		} else {
			nX=256+16;
			nY=192+16;
			pBuf=(unsigned char*)framedata;
			nBits=32;
		}

		FILE *fp=fopen(csTmp, "wb");
		if (NULL == fp) {
			MessageBox(myWnd, "Failed to open output file", "Classic99 Error", MB_OK);
			return;
		}

		int tmp;
		fputc('B', fp);				// signature, BM
		fputc('M', fp);
		tmp=nX*nY*3+54;
		fwrite(&tmp, 4, 1, fp);		// size of file
		tmp=0;
		fwrite(&tmp, 4, 1, fp);		// four reserved bytes (2x 2 byte fields)
		tmp=26;
		fwrite(&tmp, 4, 1, fp);		// offset to data
		tmp=12;
		fwrite(&tmp, 4, 1, fp);		// size of the header (v1)
		fwrite(&nX, 2, 1, fp);		// width in pixels
		fwrite(&nY, 2, 1, fp);		// height in pixels
		tmp=1;
		fwrite(&tmp, 2, 1, fp);		// number of planes (1)
		tmp=24;
		fwrite(&tmp, 2, 1, fp);		// bits per pixel (0x18=24)

		if (nBits == 16) {
			// 16-bit 0rrr rrgg gggb bbbb values
			// TODO: not used anymore
			unsigned short *p = (unsigned short*)pBuf;

			for (int idx=0; idx<nX*nY; idx++) {
				int r,g,b;
				
				// extract colors
				r=((*p)&0x7c00)>>10;
				g=((*p)&0x3e0)>>5;
				b=((*p)&0x1f);

				// scale up from 5 bit to 8 bit
				r<<=3;
				g<<=3;
				b<<=3;

				// write out to file
				fputc(b, fp);
				fputc(g, fp);
				fputc(r, fp);

				p++;
			}
		} else {
			// 32-bit 0BGR
			for (int idx=0; idx<nX*nY; idx++) {
				int r,g,b;
				
				// extract colors
				b=*pBuf++;
				g=*pBuf++;
				r=*pBuf++;
				pBuf++;					// skip	0

				// write out to file
				fputc(b, fp);
				fputc(g, fp);
				fputc(r, fp);
			}
		}

		fclose(fp);

		CString csTmp2;
		csTmp2.Format("Classic99 - Saved %sfiltered - %s", bFiltered?"":"un", (LPCSTR)csTmp);
		SetWindowText(myWnd, csTmp2);
	}
}
