#if 0 /* STILL UNUSED */
/****************************************************************************
*   PROJECT: Squeak port for Win32 (NT / Win95)
*   FILE:    sqWin32DirectX.c
*   CONTENT: DirectX stuff
*
*   AUTHOR:  Andreas Raab (ar)
*   ADDRESS: Walt Disney Imagineering, Glendale, CA
*   EMAIL:   Andreas.Raab@disney.com
*   RCSID:   $Id: sqWin32DirectX.c,v 1.1.1.1 2001/02/04 23:27:54 anonymous Exp $
*
*   NOTES:
*		1) A (host) display depth of 16, 24, or 32 bit is required
*
*		2) We always allocate 4 more pixels for the actual DX surface.
*		   The reason for this is that often we can deal more easily
*		   with four pixels at once (such as when unrolling loops or
*		   converting from 8bit 32bit).
*
*		3) C and ASM conversions are mixed; mainly because the C
*		   versions are usually quite fast (unless there are some
*		   special dependencies that can't really be expressed in C).
*
*****************************************************************************/
#define INITGUID /* Necessary once per project if outside MFC */
#include <windows.h>
#include <ddraw.h>
#include <d3d.h>

#include "sq.h"
/* #define D3D_DEBUG */
/* #define D3D_INFO */
/* #define D3D_DIRECT_BLT */
/* #define NO_COPY_BACK */

/* The Squeak main window */
extern HWND stWindow;

/* DirectDraw (2D) interfaces */
static LPDIRECTDRAW            lpdd                      = NULL; 
static LPDIRECTDRAWSURFACE     lpddPrimary               = NULL;
static LPDIRECTDRAWSURFACE     lpddDevice                = NULL;
static LPDIRECTDRAWCLIPPER     lpddClipper               = NULL;

static LPDIRECTDRAWSURFACE     lpddZBuffer               = NULL;

/* DirectDraw (3D) interfaces */
static LPDIRECT3D              lpd3d                     = NULL;
static LPDIRECT3DDEVICE        lpd3dDevice               = NULL;
static LPDIRECT3DVIEWPORT      lpd3dViewport             = NULL;

/* Hard and software caps of the DX driver */
static DDCAPS	hwCaps;
static DDCAPS	swCaps;

/* The current display depth */
static DWORD                   dwDisplayBitDepth          = 0UL;

/* The display descriptor */
static DDSURFACEDESC displayDesc;

/* The size of the DirectX offscreen surface */
static DWORD dxWidth = 0;
static DWORD dxHeight = 0;

/* The display values */
extern RECT stWindowRect;	/* stWindow rectangle in screen coords */

/* Is DirectX currently enabled?! */
static int fDirectXEnabled = 1;

/* Flag determining whether to use a smart clipping approach */
static int fDirectXSmartClipper = 1;

/* Flag determining whether the clipper is currently attached */
static int fClipperAttached = 0;

/* Squeak's primitive vertex definition */
typedef struct B3DPrimitiveVertex {
	float position[3];
	float normal[3];
	float texCoord[2];
	float rasterPos[4];
	int pixelValue32;
	int clipFlags;
	int windowPos[2];
} B3DPrimitiveVertex;

/* Squeak's primitive viewport definition */
typedef struct B3DPrimitiveViewport {
	int x0;
	int y0;
	int x1;
	int y1;
} B3DPrimitiveViewport;

typedef struct sqRect {
	int left;
	int top;
	int right;
	int bottom;
} sqRect;

/* General dummy for Squeak's primitive faces */
typedef int B3DInputFace;

/* globally release *all* DX related stuff */
void sqDirectXRelease(void);

/* A little helper for restoring lost surfaces */
#define D3DRESTORE(lp) \
	if(lp) { \
		hRes = lp->lpVtbl->IsLost(lp);\
		if(hRes == DDERR_SURFACELOST)\
			hRes = lp->lpVtbl->Restore(lp);\
		if(FAILED(hRes)) {\
			sqDirectXRelease();\
			fDirectXEnabled = 0;\
			return hRes;\
		} \
	}

/***************************************************************************
 ***************************************************************************
						Lookup table initialization
 ***************************************************************************
 ***************************************************************************/

/* Table for translation from 8bit to 16bit display depth */
static DWORD *lut8x16 = NULL;
/* Table for translation from 16bit to 16bit display depth */
static DWORD *lut16x16 = NULL;
/* Table for translation from 8bit to 32bit display depth */
static DWORD *lut8x32 = NULL;

/*
	Init8x16:
		Initialize the 8bit to 16bit palette translation table for Squeak.
*/
void Init8x16(int rMask,int gMask, int bMask, DWORD **lookupTable)
{
	int rShift, gShift, bShift;
	int rMax, gMax, bMax, i;
	extern LOGPALETTE *logPal;
	PALETTEENTRY *entry;

	if(!*lookupTable)
		*lookupTable = calloc(256, sizeof(DWORD));

	rShift = 0; while( ((1 << rShift) & rMask) == 0) rShift++;
	gShift = 0; while( ((1 << gShift) & gMask) == 0) gShift++;
	bShift = 0; while( ((1 << bShift) & bMask) == 0) bShift++;

	rMax = rMask >> rShift;
	gMax = gMask >> gShift;
	bMax = bMask >> bShift;
	entry = logPal->palPalEntry;

	for(i=0;i<256;i++,entry++) {
		(*lookupTable)[i] = ((entry->peRed * rMax / 255) << rShift) +
			((entry->peGreen * gMax / 255) << gShift) +
			((entry->peBlue * bMax / 255) << bShift);
	}
}

/*
	Init16x16:
		Initialize the 8bit to 16bit palette translation table for Squeak.
		Note: The translation table encoding is quite tricky. Each word
		in the table consists of two half-words; one used for the lower
		8bit of a Squeak 16bit pixel value and one used for the high 8bit
		of a Squeak 16bit pixel value. Thus, translation is performed by:
			outValue = (lut16x16[LOBYTE(inValue)]) & 16r0000FFFF) +
						((lut16x16[HIBYTE(inValue)]) >> 16).
*/
void Init16x16(int rMask, int gMask, int bMask, DWORD **lookupTable)
{
	int rShift, gShift, bShift;
	int rMax, gMax, bMax, i;

	if(!*lookupTable)
		*lookupTable = calloc(256, sizeof(DWORD));

	rShift = 0; while( ((1 << rShift) & rMask) == 0) rShift++;
	gShift = 0; while( ((1 << gShift) & gMask) == 0) gShift++;
	bShift = 0; while( ((1 << bShift) & bMask) == 0) bShift++;

	rMax = rMask >> rShift;
	gMax = gMask >> gShift;
	bMax = bMask >> bShift;

	for(i=0; i<256; i++) {
		(*lookupTable)[i] += (i & 0x1F) << bShift;
		(*lookupTable)[i] += ((i & 0xE0) >> 5) << gShift;

		(*lookupTable)[i] += (((i & 0x03) << 3) << gShift) << 16;
		(*lookupTable)[i] += (((i >> 2) & 0x1F) << rShift) << 16;
	}
}

/*
	Init8x32:
		Initialize the 8bit to 32bit palette translation table for Squeak.
*/
void Init8x32(int rMask, int gMask, int bMask, DWORD **lookupTable)
{
	int rShift, gShift, bShift;
	int rMax, gMax, bMax, i;
	extern LOGPALETTE *logPal;
	PALETTEENTRY *entry = logPal->palPalEntry;

	if(!*lookupTable)
		*lookupTable = calloc(256, sizeof(DWORD));

	rShift = 0; while( ((1 << rShift) & rMask) == 0) rShift++;
	gShift = 0; while( ((1 << gShift) & gMask) == 0) gShift++;
	bShift = 0; while( ((1 << bShift) & bMask) == 0) bShift++;

	rMax = rMask >> rShift;
	gMax = gMask >> gShift;
	bMax = bMask >> bShift;

	for(i=0;i<256;i++,entry++) {
		(*lookupTable)[i] = ((entry->peRed * rMax / 255) << rShift) +
			((entry->peGreen * gMax / 255) << gShift) +
			((entry->peBlue * bMax / 255) << bShift);
	}
}

/***************************************************************************
 ***************************************************************************
						Squeak To 16 bit conversions
 ***************************************************************************
 ***************************************************************************/
/*
	Copy8x16:
		Copy the Squeak (8bit) display onto the native (16bit) surface.
		This function always deals with multiples of four pixels.
*/
int Copy8x16(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define DO_SETUP if(!lut8x16) Init8x16( displayDesc.ddpfPixelFormat.dwRBitMask,\
										displayDesc.ddpfPixelFormat.dwGBitMask,\
										displayDesc.ddpfPixelFormat.dwBBitMask,\
										&lut8x16);
#define PIXEL_ALIGN 4
#define SRC_PPW 4
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int*) inBits;
	unsigned int *out = (unsigned int*) outBits;
	unsigned int *lut = lut8x16;
	unsigned int pv, v1, v2;
	do {
		pv = *in; in++;
		v1  = (lut[pv & 255] << 16); pv >>= 8;
		v1 += lut[pv & 255]; pv >>= 8;
		v2  = (lut[pv & 255] << 16); pv >>= 8;
		v2 += lut[pv & 255]; pv >>= 8;
		*out = v2; out++;
		*out = v1; out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}

/*
	Copy16x16:
		Copy the Squeak (16bit) display onto the native (16bit) surface.
		This function always deals with multiples of two pixels.
*/
int Copy16x16(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define DO_SETUP if(!lut16x16) Init16x16(	displayDesc.ddpfPixelFormat.dwRBitMask,\
											displayDesc.ddpfPixelFormat.dwGBitMask,\
											displayDesc.ddpfPixelFormat.dwBBitMask,\
											&lut16x16);
#define PIXEL_ALIGN 2
#define SRC_PPW 2
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int*) inBits;
	unsigned int *out = (unsigned int*) outBits;
	unsigned int *lut = lut16x16;
	unsigned int pv, v1, v2;
	do {
		pv = *in; in++;
		v1 = lut[pv & 0xFF]; /* <-- no need to mask upper word since we'll shift left by 16 */
		pv >>= 8;
		v1 += lut[pv & 0xFF] >> 16;
		pv >>= 8;
		v2 = (lut[pv & 0xFF]) & 0xFFFF;
		pv >>= 8;
		v2 += lut[pv & 0xFF] >> 16;
		*out = (v1 << 16) + v2; out++; /* <-- here is the shift */
	} while(--n);
}
#include "sqWin32CopyEnd.h"

}

/*
	FastCopy16x16:
		Copy the Squeak (16bit) display onto the native (16bit) surface
		given that the formats are identical (e.g., we only have to swap
		the words and not to do any table lookup).
		This function always deals with multiples of four pixels.
*/
int FastCopy16x16(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 4
#define SRC_PPW 2
#include "sqWin32CopyBegin.h"

{
	/* Note: I'm using a assembly version here since
	   I can't get MSVC to generate the 'rol' below */
	__asm {

		mov ecx, nWords
		shr ecx, 1
		mov edi, outBits
		mov esi, inBits

nextPixelWord: /* Convert two full pixel words (4 pixels) */
		mov eax,[esi]			/* 1: fetch first full (2 pixel) word */
		mov ebx,[esi+4]			/* 2: fetch second full (2 pixel) word */
		rol eax, 16				/* 3: Swap first word */
		rol ebx, 16				/* 4: Swap second word */
		mov [edi], eax			/* 5: store first pixel word */
		mov [edi+4], ebx		/* 6: store second pixel word */
		add esi, 8				/* 7: advance inBits */
		add edi, 8				/* 8: advance outBits */
		dec ecx					/* 9: adjust # of words */
		jnz nextPixelWord		/* 10: do next pixel word */
	}
}
#include "sqWin32CopyEnd.h"
}

/*
	Copy32x16:
		Copy the Squeak (32bit) display onto the native (16bit) surface
*/
int Copy32x16(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define DO_SETUP if(!lut16x16) Init16x16(	displayDesc.ddpfPixelFormat.dwRBitMask,\
											displayDesc.ddpfPixelFormat.dwGBitMask,\
											displayDesc.ddpfPixelFormat.dwBBitMask,\
											&lut16x16);
#define PIXEL_ALIGN 2
#define SRC_PPW 1
#include "sqWin32CopyBegin.h"
{
	int n = nWords >> 1;
	unsigned int *in = (unsigned int *) inBits;
	unsigned int *out = (unsigned int *) outBits;
	unsigned int *lut = (unsigned int *) lut16x16;
	unsigned int pv, r, g, b, v1,v2;
	do {
		pv = *in; in++;
		b = (pv >>= 3) & 0x1F;
		g = (pv >>= 8) & 0x1F;
		r = (pv >>= 8) & 0x1F;
		v1 = (r << 10) + (g << 5) + b;
		v1 = (lut[v1 & 0xFF] & 0xFFFF) + (lut[v1 >> 8] >> 16);
		pv = *in; in++;
		b = (pv >>= 3) & 0x1F;
		g = (pv >>= 8) & 0x1F;
		r = (pv >>= 8) & 0x1F;
		v2 = (r << 10) + (g << 5) + b;
		v2 = (lut[v2 & 0xFF]) + (lut[v2 >> 8] >> 16);
		*out = v1 + (v2 << 16); out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}

/*
	FastCopy32x16:
		Copy the Squeak (32bit) display onto the native (16bit) surface
		given that the formats are identical (e.g., we only have to compress
		the words and not to do any table lookup).
*/
int FastCopy32x16(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 2
#define SRC_PPW 1
#include "sqWin32CopyBegin.h"
{
	int n = nWords >> 1;
	unsigned int *in = (unsigned int *) inBits;
	unsigned int *out = (unsigned int *) outBits;
	unsigned int pv, r, g, b, value;
	do {
		pv = *in; in++;
		b = (pv >>= 3) & 0x1F;
		g = (pv >>= 8) & 0x1F;
		r = (pv >>= 8) & 0x1F;
		value = (r << 10) + (g << 5) + b;
		pv = *in; in++;
		b = (pv >>= 3) & 0x1F;
		g = (pv >>= 8) & 0x1F;
		r = (pv >>= 8) & 0x1F;
		value += ((r << 10) + (g << 5) + b) << 16;
		*out = value; out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}

/***************************************************************************
 ***************************************************************************
						Squeak To 24 bit conversions
 ***************************************************************************
 ***************************************************************************/
typedef struct __rgb24{ char r, g, b; } __rgb24;
/*
	Copy8x24:
		Copy the Squeak (8bit) display onto the native (24bit) surface.
*/
int Copy8x24(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define DO_SETUP if(!lut8x32) Init8x32(	displayDesc.ddpfPixelFormat.dwRBitMask,\
										displayDesc.ddpfPixelFormat.dwGBitMask,\
										displayDesc.ddpfPixelFormat.dwBBitMask,\
										&lut8x32);
#define PIXEL_ALIGN 4
#define SRC_PPW 4
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int *) inBits;
	__rgb24 *out = (__rgb24 *) outBits;
	unsigned int *lut = (unsigned int *) lut8x32;
	unsigned int pv;
	do {
		pv = *in; in++;
		*out = *(__rgb24*) (lut + ((pv >> 24) & 0xFF)); out++;
		*out = *(__rgb24*) (lut + ((pv >> 16) & 0xFF)); out++;
		*out = *(__rgb24*) (lut + ((pv >>  8) & 0xFF)); out++;
		*out = *(__rgb24*) (lut + ((pv      ) & 0xFF)); out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}
 
/*
	FastCopy16x24
		Copy the Squeak (16bit) display onto the native (24bit) surface
		given that the formats are identical (e.g., we only have to expand
		the words and not to do any table lookup).
*/
int FastCopy16x24(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 2
#define SRC_PPW 2
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int *) inBits;
	unsigned char *out = (unsigned char *) outBits;
	unsigned int pv, v1, v2;
	do {
		pv = *in; in++;
		v1 = ((pv & 0x7C00) << 9) | ((pv & 0x03E0) << 6) | ((pv & 0x1F) << 3);
		pv >>= 16;
		v2 = ((pv & 0x7C00) << 9) | ((pv & 0x03E0) << 6) | ((pv & 0x1F) << 3);
		*out++ = v2; *out++ = v2 >> 8; *out++ = v2 >> 16;
		*out++ = v1; *out++ = v1 >> 8; *out++ = v1 >> 16;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}

/*
	FastCopy32x24
		Copy the Squeak (32bit) display onto the native (24bit) surface
		given that the formats are identical (e.g., we only have to copy
		the words and not to do any table lookup).
*/
int FastCopy32x24(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 4
#define SRC_PPW 1
#include "sqWin32CopyBegin.h"
{
	__asm {
		mov edx, nWords
		mov edi, outBits
		mov esi, inBits
		shr edx, 2				/* adjust # of words */
		cld						/* adjust direction */

nextPixelWord:
		movsw					/* copy r, g */
		movsb					/* copy b */
		inc esi					/* skip a */
		movsw					/* copy r, g */
		movsb					/* copy b */
		inc esi					/* skip a */
		movsw					/* copy r, g */
		movsb					/* copy b */
		inc esi					/* skip a */
		movsw					/* copy r, g */
		movsb					/* copy b */
		inc esi					/* skip a */
		dec edx					/* nWords -= 4 */
		jnz nextPixelWord
	}
}
#include "sqWin32CopyEnd.h"
}

/***************************************************************************
 ***************************************************************************
						Squeak To 32 bit conversions
 ***************************************************************************
 ***************************************************************************/


/*
	Copy8x32:
		Copy the Squeak (8bit) display onto the native (32bit) surface.
*/
int Copy8x32(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define DO_SETUP if(!lut8x32) Init8x32(	displayDesc.ddpfPixelFormat.dwRBitMask,\
										displayDesc.ddpfPixelFormat.dwGBitMask,\
										displayDesc.ddpfPixelFormat.dwBBitMask,\
										&lut8x32);
#define PIXEL_ALIGN 4
#define SRC_PPW 4
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int *) inBits;
	unsigned int *out = (unsigned int *) outBits;
	unsigned int *lut = (unsigned int *) lut8x32;
	unsigned int pv;
	do {
		pv = *in; in++;
		*out = lut[(pv >> 24) & 0xFF]; out++;
		*out = lut[(pv >> 16) & 0xFF]; out++;
		*out = lut[(pv >> 8)  & 0xFF]; out++;
		*out = lut[ pv        & 0xFF]; out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}


/*
	FastCopy16x32
		Copy the Squeak (16bit) display onto the native (32bit) surface
		given that the formats are identical (e.g., we only have to expand
		the words and not to do any table lookup).
*/
int FastCopy16x32(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 2
#define SRC_PPW 2
#include "sqWin32CopyBegin.h"
{
	int n = nWords;
	unsigned int *in = (unsigned int *) inBits;
	unsigned int *out = (unsigned int *) outBits;
	unsigned int pv, v1, v2;
	do {
		pv = *in; in++;
		v1 = ((pv & 0x7C00) << 9) | ((pv & 0x03E0) << 6) | ((pv & 0x1F) << 3);
		pv >>= 16;
		v2 = ((pv & 0x7C00) << 9) | ((pv & 0x03E0) << 6) | ((pv & 0x1F) << 3);
		*out = v2; out++;
		*out = v1; out++;
	} while(--n);
}
#include "sqWin32CopyEnd.h"
}


/*
	FastCopy32x32
		Copy the Squeak (32bit) display onto the native (32bit) surface
		given that the formats are identical (e.g., we only have to copy
		the words and not to do any table lookup).
*/
int FastCopy32x32(int srcBits, int srcWidth, LPDIRECTDRAWSURFACE lpddSurface, RECT *dxRect)
{
#define PIXEL_ALIGN 4
#define SRC_PPW 1
#include "sqWin32CopyBegin.h"
{
	memcpy((void*)outBits, (void*) inBits, nWords << 2);
}
#include "sqWin32CopyEnd.h"
}

/***************************************************************************
 ***************************************************************************
					D3D Initialization
 ***************************************************************************
 ***************************************************************************/
/*
	EnumDeviceCallback
	Enumerate all D3D devices and try to find a suitable hardware
	rasterizer. If none is found use a software rasterizer.
*/

static BOOL                    fDeviceFound;
static BOOL                    fIsHardwareDevice;
static GUID                    guidDevice;
static char                    szDeviceName[256];
static char                    szDeviceDesc[256];
static D3DDEVICEDESC           d3dDeviceDesc;

static HRESULT WINAPI
EnumDeviceCallback(LPGUID          lpGUID, 
                   LPSTR           lpszDeviceDesc,
                   LPSTR           lpszDeviceName,
                   LPD3DDEVICEDESC lpd3dHWDeviceDesc,
                   LPD3DDEVICEDESC lpd3dSWDeviceDesc,
                   LPVOID          lpUserArg)
{
    LPD3DDEVICEDESC deviceDesc;
	LPD3DPRIMCAPS	triCaps;
	DWORD			depth;
	BOOL			fIsHardware;

    /* If there is no hardware support then the color model is zero */
    fIsHardware     = (0UL != lpd3dHWDeviceDesc->dcmColorModel);
    deviceDesc = (fIsHardware ? lpd3dHWDeviceDesc : lpd3dSWDeviceDesc);

	/* Check the triangle capabilities of this device */
	triCaps = &deviceDesc->dpcTriCaps;

#ifdef D3D_INFO
	warnPrintf("\nDriver: %s\n", lpszDeviceName);
	warnPrintf("Hardware accellerated: %s\n", fIsHardware ? "YES" : "NO");
	warnPrintf("Available render depths: ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_1) warnPrintf("1 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_2) warnPrintf("2 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_4) warnPrintf("4 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_8) warnPrintf("8 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_16) warnPrintf("16 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_24) warnPrintf("24 ");
	if(deviceDesc->dwDeviceRenderBitDepth & DDBD_32) warnPrintf("32 ");
	warnPrintf("\n");

	warnPrintf("Color model: %s\n", 
		(deviceDesc->dcmColorModel & D3DCOLOR_RGB) ? "RGB" : "MONO");
	warnPrintf("Floatint point TLVertex: %s\n",
		(deviceDesc->dwDevCaps & D3DDEVCAPS_FLOATTLVERTEX) ? "YES" : "NO");

	warnPrintf("Available Z-buffer depths: ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_1) warnPrintf("1 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_2) warnPrintf("2 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_4) warnPrintf("4 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_8) warnPrintf("8 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_16) warnPrintf("16 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_24) warnPrintf("24 ");
	if(deviceDesc->dwDeviceZBufferBitDepth & DDBD_32) warnPrintf("32 ");
	warnPrintf("\n");

	warnPrintf("Z-buffering available: %s\n",
		(triCaps->dwRasterCaps & D3DPRASTERCAPS_ZTEST) ? "YES" : "NO");

	warnPrintf("Z-buffer tests: ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_NEVER) warnPrintf("NEVER ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_LESS) warnPrintf("LESS ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_EQUAL) warnPrintf("EQUAL ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_LESSEQUAL) warnPrintf("LESSEQUAL ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_GREATER) warnPrintf("GREATER ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_NOTEQUAL) warnPrintf("NOTEQUAL ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_GREATEREQUAL) warnPrintf("GREATEREQUAL ");
	if(triCaps->dwRasterCaps & D3DPCMPCAPS_ALWAYS) warnPrintf("ALWAYS ");
	warnPrintf("\n");

	warnPrintf("Gouraud shading (RGB): %s\n",
		(triCaps->dwShadeCaps & D3DPSHADECAPS_COLORGOURAUDRGB) ? "YES" : "NO");
	warnPrintf("Texture perspective correction: %s\n",
		(triCaps->dwTextureCaps & D3DPTEXTURECAPS_PERSPECTIVE) ? "YES" : "NO");
	warnPrintf("Bilinear texture interpolation: %s\n",
		(triCaps->dwTextureFilterCaps & D3DPTFILTERCAPS_LINEAR) ? "YES" : "NO");
#endif

	/* The device must support the current display depth */
	if(!(deviceDesc->dwDeviceRenderBitDepth & dwDisplayBitDepth))
		return D3DENUMRET_OK;

	/* The device must be RGB (in contrast to MONO) */
	if(!(deviceDesc->dcmColorModel & D3DCOLOR_RGB))
		return D3DENUMRET_OK;

	/* The device must support floating point TLVertex data */
	if(!(deviceDesc->dwDevCaps & D3DDEVCAPS_FLOATTLVERTEX))
		return D3DENUMRET_OK;

	/* The device must have a z-buffer >= 16bit */
	depth = deviceDesc->dwDeviceZBufferBitDepth;
	if( !(depth & DDBD_16) && !(depth & DDBD_24) && !(depth & DDBD_32))
		return D3DENUMRET_OK;


	/* The device must support z-buffering */
	if(!(triCaps->dwRasterCaps & D3DPRASTERCAPS_ZTEST))
		return D3DENUMRET_OK;

	/* The device must support less or less-equal depth comparison */
	if(!(triCaps->dwZCmpCaps & D3DPCMPCAPS_LESS) &&
		!(triCaps->dwZCmpCaps & D3DPCMPCAPS_LESSEQUAL))
			return D3DENUMRET_OK;

    /* The device must support gouraud shaded triangles */
	if(!(triCaps->dwShadeCaps & D3DPSHADECAPS_COLORGOURAUDMONO) &&
		!(triCaps->dwShadeCaps & D3DPSHADECAPS_COLORGOURAUDRGB))
			return D3DENUMRET_OK;

	/* The device must support perspective corrected textures */
	if(!(triCaps->dwTextureCaps & D3DPTEXTURECAPS_PERSPECTIVE))
		return D3DENUMRET_OK;

	/* The device must support texture interpolation */
	if(!(triCaps->dwTextureFilterCaps & D3DPTFILTERCAPS_LINEAR))
		return D3DENUMRET_OK;


    /* This is a device we are interested in - cache the details away */
    fDeviceFound = 1;
	fIsHardwareDevice = fIsHardware;
    CopyMemory(&guidDevice, lpGUID, sizeof(GUID));
    strcpy(szDeviceDesc, lpszDeviceDesc);
    strcpy(szDeviceName, lpszDeviceName);
	if(fIsHardware)
	    CopyMemory(&d3dDeviceDesc, lpd3dHWDeviceDesc, sizeof(D3DDEVICEDESC));
	else
		CopyMemory(&d3dDeviceDesc, lpd3dSWDeviceDesc, sizeof(D3DDEVICEDESC));

	/* If the device is hardware then just use it */
	if(fIsHardware)
		return D3DENUMRET_CANCEL; /* Done */

    /* Otherwise keep looking... */
    return D3DENUMRET_OK;
}


/* 
 * The texture callback function. We try to find a number of different
 * formats so we'll have a good chance whatever is provided later on.
 */
static DDPIXELFORMAT		ddpfTextureFormat08;
static DDPIXELFORMAT		ddpfTextureFormat0x5x5x5;
static DDPIXELFORMAT		ddpfTextureFormat1x5x5x5; /* unlikely - HW only */
static DDPIXELFORMAT		ddpfTextureFormat4x4x4x4; /* unlikely - HW only */
static DDPIXELFORMAT		ddpfTextureFormat0x8x8x8;
static DDPIXELFORMAT		ddpfTextureFormat8x8x8x8;
static BOOL					fTextureFound08;
static BOOL					fTextureFound0x5x5x5;
static BOOL					fTextureFound1x5x5x5; /* unlikely - HW only */
static BOOL					fTextureFound4x4x4x4; /* unlikely - HW only */
static BOOL					fTextureFound0x8x8x8;
static BOOL					fTextureFound8x8x8x8;

HRESULT CALLBACK FindTextureCallback(DDSURFACEDESC *DeviceFmt, LPVOID lParam)
{
    DDPIXELFORMAT ddpf = DeviceFmt->ddpfPixelFormat;

	/* Check for a 8 bit palette indexed texture */
	if(ddpf.dwFlags & DDPF_PALETTEINDEXED8) {
#ifdef D3D_INFO
		warnPrintf("\nTexture: 8bit palette indexed\n");
#endif
		CopyMemory(&ddpfTextureFormat08, &ddpf, sizeof(DDPIXELFORMAT));
		fTextureFound08 = TRUE;
		return DDENUMRET_OK;
	}
	/* Check for 16 bit textures */
	if((ddpf.dwRGBBitCount == 16) && (ddpf.dwFlags & DDPF_RGB)) {
#ifdef D3D_INFO
		warnPrintf("\nTexture: 16bit RGB\n");
		warnPrintf("Red mask: %x\n", ddpf.dwRBitMask);
		warnPrintf("Green mask: %x\n", ddpf.dwGBitMask);
		warnPrintf("Blue mask: %x\n", ddpf.dwBBitMask);
		warnPrintf("Alpha mask: %x\n", ddpf.dwRGBAlphaBitMask);
#endif
		if((ddpf.dwFlags & DDPF_ALPHAPIXELS) && 
			ddpf.dwRBitMask			== 0x0F00 && 
			ddpf.dwGBitMask			== 0x00F0 && 
			ddpf.dwBBitMask			== 0x000F &&
			ddpf.dwRGBAlphaBitMask	== 0xF000) {
#ifdef D3D_INFO
			warnPrintf("[Note: Perfect 4x4x4x4 texture format]\n");
#endif
				CopyMemory(&ddpfTextureFormat4x4x4x4, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound4x4x4x4 = TRUE;
				return DDENUMRET_OK;
		}
		if(ddpf.dwRBitMask == 0x7C00 && 
			ddpf.dwGBitMask == 0x03E0 && 
			ddpf.dwBBitMask == 0x001F) {
			if(ddpf.dwFlags & DDPF_ALPHAPIXELS) {
				if(ddpf.dwRGBAlphaBitMask == 0x8000) {
#ifdef D3D_INFO
					warnPrintf("[Note: Perfect 1x5x5x5 texture format]\n");
#endif
					CopyMemory(&ddpfTextureFormat1x5x5x5, &ddpf, sizeof(DDPIXELFORMAT));
					fTextureFound1x5x5x5 = TRUE;
				}
			} else {
#ifdef D3D_INFO
				warnPrintf("[Note: Perfect 0x5x5x5 texture format]\n");
#endif
				CopyMemory(&ddpfTextureFormat0x5x5x5, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound0x5x5x5 = TRUE;
			}
		} else {
			if(ddpf.dwFlags & DDPF_ALPHAPIXELS) {
#ifdef D3D_INFO
				warnPrintf("[Note: Lousy 1x5x5x5 texture format]\n");
#endif
				if(fTextureFound1x5x5x5) return DDENUMRET_OK;
				CopyMemory(&ddpfTextureFormat1x5x5x5, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound1x5x5x5 = TRUE;
			} else {
#ifdef D3D_INFO
				warnPrintf("[Note: Lousy 0x5x5x5 texture format]\n");
#endif
				if(fTextureFound0x5x5x5) return DDENUMRET_OK;
				CopyMemory(&ddpfTextureFormat0x5x5x5, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound0x5x5x5 = TRUE;
			}
		}
	}
	/* Check for 32bit textures */
	if( (ddpf.dwRGBBitCount == 32) && (ddpf.dwFlags & DDPF_RGB)) {
#ifdef D3D_INFO
		warnPrintf("\nTexture: 32bit RGB\n");
		warnPrintf("Red mask: %x\n", ddpf.dwRBitMask);
		warnPrintf("Green mask: %x\n", ddpf.dwGBitMask);
		warnPrintf("Blue mask: %x\n", ddpf.dwBBitMask);
		warnPrintf("Alpha mask: %x\n", ddpf.dwRGBAlphaBitMask);
#endif
		if(	ddpf.dwRBitMask == 0x00FF0000 && 
			ddpf.dwGBitMask == 0x0000FF00 && 
			ddpf.dwBBitMask == 0x000000FF) {
			if(ddpf.dwFlags & DDPF_ALPHAPIXELS) {
				if(ddpf.dwRGBAlphaBitMask == 0xFF000000) {
#ifdef D3D_INFO
					warnPrintf("[Note: Perfect 8x8x8x8 texture format]\n");
#endif
					CopyMemory(&ddpfTextureFormat8x8x8x8, &ddpf, sizeof(DDPIXELFORMAT));
					fTextureFound8x8x8x8 = TRUE;
				}
			} else {
#ifdef D3D_INFO
				warnPrintf("[Note: Perfect 0x8x8x8 texture format]\n");
#endif
				CopyMemory(&ddpfTextureFormat0x8x8x8, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound0x8x8x8 = TRUE;
			}
		} else {
			if(ddpf.dwFlags & DDPF_ALPHAPIXELS) {
#ifdef D3D_INFO
				warnPrintf("[Note: Lousy 8x8x8x8 texture format]\n");
#endif
				if(fTextureFound8x8x8x8) return DDENUMRET_OK;
				CopyMemory(&ddpfTextureFormat8x8x8x8, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound0x8x8x8 = TRUE;
			} else {
#ifdef D3D_INFO
				warnPrintf("[Note: Lousy 0x8x8x8 texture format]\n");
#endif
				if(fTextureFound0x5x5x5) return DDENUMRET_OK;
				CopyMemory(&ddpfTextureFormat0x8x8x8, &ddpf, sizeof(DDPIXELFORMAT));
				fTextureFound0x8x8x8 = TRUE;
			}
		}
	}

    return DDENUMRET_OK;
}

/***************************************************************************
 ***************************************************************************
					Texture handling
 ***************************************************************************
 ***************************************************************************/

typedef struct b3dTextureCacheEntry {
	DWORD stamp;
	DWORD width;
	DWORD height;
	DWORD depth;
	DDPIXELFORMAT *fmt;
	LPDIRECTDRAWSURFACE	lpddTexture;
	LPDIRECT3DTEXTURE lpd3dTexture;
	D3DTEXTUREHANDLE hTexture;
} b3dTextureCacheEntry;

/* Max. # of entries in the texture cache */
#define MAX_TEXTURES 256

/* The texture cache */
static b3dTextureCacheEntry b3dTextureCache[MAX_TEXTURES];

/* # of entries used in texture cache */
static int b3dUsedTextures = 0;


/* LRU time stamp for use of textures */
static DWORD dwTextureStamp = 0;

/* Currently used texture */
static b3dTextureCacheEntry *b3dActiveTexture = NULL;

/* Flag determining whether the texture has changed */
static BOOL fTextureChanged = FALSE;

/*
	b3dInitTextureCache
	Initialize the entire texture cache.
*/
void b3dInitTextureCache(void)
{
	int i;
	b3dTextureCacheEntry *entry;

	for(i=0; i< MAX_TEXTURES; i++) {
		entry = &b3dTextureCache[i];
		entry->stamp = 0;
		entry->width = 0;
		entry->height = 0;
		entry->depth = 0;
		entry->fmt = NULL;
		entry->lpddTexture = NULL;
		entry->lpd3dTexture = NULL;
		entry->hTexture = 0;
	}
	b3dUsedTextures = 0;
	dwTextureStamp = 0;
	b3dActiveTexture = NULL;
	fTextureChanged = 1;
}

/*
	b3dReleaseTexture
	Release a single texture cache entry.
*/

void b3dReleaseTexture(b3dTextureCacheEntry *entry)
{
#define RELEASE(lp) if(lp) { lp->lpVtbl->Release(lp); lp = NULL; }
	entry->stamp = 0;
	entry->width = 0;
	entry->height = 0;
	entry->depth = 0;
	entry->fmt = NULL;
	entry->hTexture = 0;
	RELEASE(entry->lpd3dTexture);
	RELEASE(entry->lpddTexture);
#undef RELEASE
}

/*
	b3dReleaseTextureCache
	Release the entire texture cache.
*/
void b3dReleaseTextureCache(void)
{
	int i;
	for(i=0; i< MAX_TEXTURES; i++) {
		b3dReleaseTexture(b3dTextureCache + i);
	}
	b3dUsedTextures = 0;
	dwTextureStamp = 0;
	b3dActiveTexture = NULL;
	fTextureChanged = 1;
}

/*
	b3dInstallTexture
	Install a previously loaded texture as the new active texture.
*/

void b3dInstallTexture(int textureHandle)
{
	b3dTextureCacheEntry *entry;
	if(textureHandle < 1 || textureHandle > MAX_TEXTURES) {
		fTextureChanged = b3dActiveTexture != NULL;
		b3dActiveTexture = NULL;
		return;
	}
	entry = b3dTextureCache + (textureHandle - 1);
	entry->stamp = ++dwTextureStamp;
	if(entry != b3dActiveTexture) fTextureChanged = 1;
	b3dActiveTexture = entry;
	if(b3dActiveTexture->lpddTexture->lpVtbl->IsLost(b3dActiveTexture->lpddTexture)) {
		/* huah... for now just forget about it*/
		b3dActiveTexture = NULL;
		fTextureChanged = 1;
	}
}

/*
	b3dCreateTexture
	Create a texture from the given texture specification.
	This does not copy the contents it just creates the texture
	and gets the appropriate interfaces.
*/

HRESULT b3dCreateTexture(b3dTextureCacheEntry *entry)
{
	HRESULT hRes;
	DDSURFACEDESC ddsd;
	LPDIRECTDRAWSURFACE lpddTexture;
	LPDIRECT3DTEXTURE   lpd3dTexture;
	D3DTEXTUREHANDLE	hTexture;

	if(!entry->fmt) {
		/* if not yet defined select the texture format we want to use */
		switch(entry->depth) {
			case 8:
				if(fTextureFound08) {
					entry->fmt = &ddpfTextureFormat08;
					break;
				}
				return DDERR_UNSUPPORTED;
			case 16:
				if(fTextureFound1x5x5x5) {
					entry->fmt = &ddpfTextureFormat1x5x5x5;
					break;
				}
				if(fTextureFound0x5x5x5) {
					entry->fmt = &ddpfTextureFormat0x5x5x5;
					break;
				}
				return DDERR_UNSUPPORTED;
			case 32:
				if(fTextureFound4x4x4x4) {
					entry->fmt = &ddpfTextureFormat4x4x4x4;
					break;
				}
				if(fTextureFound8x8x8x8) {
					entry->fmt = &ddpfTextureFormat8x8x8x8;
					break;
				}
				if(fTextureFound0x8x8x8) {
					entry->fmt = &ddpfTextureFormat0x8x8x8;
					break;
				}
				/* fall through */
			default:
				return DDERR_UNSUPPORTED;
		};
	}
	ZeroMemory(&ddsd, sizeof(ddsd));
    ddsd.dwSize = sizeof(ddsd);
    ddsd.dwFlags |= DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT;
    ddsd.dwWidth = entry->width;
    ddsd.dwHeight = entry->height;
	ddsd.ddsCaps.dwCaps = DDSCAPS_TEXTURE /* | DDSCAPS_ALLOCONLOAD */;
	CopyMemory(&ddsd.ddpfPixelFormat, entry->fmt, sizeof(DDPIXELFORMAT));

	if(fIsHardwareDevice) 
        ddsd.ddsCaps.dwCaps |= DDSCAPS_VIDEOMEMORY;
    else
        ddsd.ddsCaps.dwCaps |= DDSCAPS_SYSTEMMEMORY;

	/* Create the texture surface */
	hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddTexture, NULL);
	/* If we've run out of video memory, try creating 
	   the texture in system memory */
	if(hRes == DDERR_OUTOFVIDEOMEMORY) {
		ddsd.ddsCaps.dwCaps &= ~DDSCAPS_VIDEOMEMORY;
		hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddTexture, NULL);
	}
	if(FAILED(hRes)) return hRes;

	/* Get the D3D texture interface and the texture handle */
	hRes = lpddTexture->lpVtbl->QueryInterface(lpddTexture, &IID_IDirect3DTexture, &lpd3dTexture);
	if(FAILED(hRes)) return hRes;
	hRes = lpd3dTexture->lpVtbl->GetHandle(lpd3dTexture, lpd3dDevice, &hTexture);
	if(FAILED(hRes)) return hRes;

	entry->lpddTexture = lpddTexture;
	entry->lpd3dTexture = lpd3dTexture;
	entry->hTexture = hTexture;
	return DD_OK;
}

int b3dLoadTexture(int width, int height, int depth, unsigned int *bits)
{
	HRESULT hRes;
	RECT dxRect;

	b3dTextureCacheEntry *entry;
	/* check if width and height are powers of two */
	if(width & (width-1)) 
		return 0;
	if(height & (height-1))
		return 0;
	if(depth != 32)
		return 0;
	if(!fTextureFound0x8x8x8 && !fTextureFound8x8x8x8)
		return 0;
	if(b3dUsedTextures >= MAX_TEXTURES) 
		return 0;
	entry = b3dTextureCache + b3dUsedTextures;
	entry->width = width;
	entry->height = height;
	entry->depth = depth;
	entry->stamp = 0;
	entry->fmt = fTextureFound0x8x8x8 ? (&ddpfTextureFormat0x8x8x8) : (&ddpfTextureFormat8x8x8x8);
	hRes = b3dCreateTexture(entry);
	if(FAILED(hRes)) return 0;
	dxRect.left = 0;
	dxRect.top = 0;
	dxRect.right = width;
	dxRect.bottom = height;
	hRes = FastCopy32x32((DWORD)bits, width, entry->lpddTexture, &dxRect);
	if(FAILED(hRes)) {
		b3dReleaseTexture(entry);
		return 0;
	}
	b3dUsedTextures++;
	return b3dUsedTextures;
}

/***************************************************************************
 ***************************************************************************
					Viewport handling
 ***************************************************************************
 ***************************************************************************/
static double vpOfsX = 0.0;
static double vpOfsY = 0.0;
static double vpScaleX = 1.0;
static double vpScaleY = 1.0;

static D3DRECT d3dRect;
static RECT    vpRect;

/*
	b3dSetViewport
	Set the given viewport.
*/

HRESULT b3dSetViewport(int x, int y, int w, int h)
{
	HRESULT hRes;
	D3DVIEWPORT d3dViewport;

    ZeroMemory(&d3dViewport, sizeof(d3dViewport));
    d3dViewport.dwSize   = sizeof(d3dViewport);
    d3dViewport.dwX      = x;
    d3dViewport.dwY      = y;
    d3dViewport.dwWidth  = w;
    d3dViewport.dwHeight = h;
    d3dViewport.dvScaleX = D3DVAL((float)d3dViewport.dwWidth / 2.0);
    d3dViewport.dvScaleY = D3DVAL((float)d3dViewport.dwHeight / 2.0);
    d3dViewport.dvMaxX   = D3DVAL(1.0);
    d3dViewport.dvMaxY   = D3DVAL(1.0);
    d3dViewport.dvMinZ   = D3DVAL(0.0);
    d3dViewport.dvMaxZ   = D3DVAL(1.0);
    hRes = lpd3dViewport->lpVtbl->SetViewport(lpd3dViewport, &d3dViewport);
    if (FAILED(hRes)) return hRes;
	vpOfsX = x + w * 0.5 - 0.5;
	vpOfsY = y + h * 0.5 - 0.5;
	vpScaleX = w * 0.5;
	vpScaleY = h * -0.5;
	d3dRect.lX1 = x;
	d3dRect.lY1 = y;
	d3dRect.lX2 = x+w;
	d3dRect.lY2 = y+h;
	vpRect.left = x;
	vpRect.top = y;
	vpRect.right = x+w;
	vpRect.bottom = y+h;
	return DD_OK;
}

/*
	b3dMapObjectVertices
	Map the given vertices into the viewport.
*/
void b3dMapObjectVertices(B3DPrimitiveVertex *vtx, int nVertices)
{
	int i;
	const double scaleX = vpScaleX;
	const double scaleY = vpScaleY;
	const double ofsX = vpOfsX;
	const double ofsY = vpOfsY;
	for(i=0; i < nVertices; i++, vtx++)
	{
		double x,y,z,w;
		w = vtx->rasterPos[3];
		if(w) w = 1.0 / w;
		x = vtx->rasterPos[0] * w * scaleX+ ofsX;
		y = vtx->rasterPos[1] * w * scaleY + ofsY;
		z = vtx->rasterPos[2] * w;
		vtx->rasterPos[0] = (float)x;
		vtx->rasterPos[1] = (float)y;
		vtx->rasterPos[2] = (float)z;
		vtx->rasterPos[3] = (float)w;
	}
}

/*
	b3dClearDepthBuffer
	Clear the z-buffer.
*/

HRESULT b3dClearDepthBuffer(void)
{
	HRESULT hRes;
	DDBLTFX ddbltfx;

	/*
     * NOTE: Its safe to specify the z-buffer clear flag even if we
     * don't have an attached z-buffer. Direct3D will simply discard
     * the flag if no z-buffer is being used.
     *
     * NOTE: For maximum efficiency we only want to clear those
     * regions of the device surface and z-buffer which we actually
     * rendered to in the last frame. 
	 *
     */

	ddbltfx.dwSize = sizeof(ddbltfx);
	ddbltfx.dwFillColor = 0;
    hRes = lpd3dViewport->lpVtbl->Clear(lpd3dViewport, 1UL, &d3dRect, D3DCLEAR_ZBUFFER);
	if(FAILED(hRes)) return hRes;
	return DD_OK;
}


/*
	b3dClearViewport
	Clear the viewport.
*/

HRESULT b3dClearViewport(DWORD pixelValue32)
{
	HRESULT hRes;
	DDBLTFX ddbltfx;

	/* Clear the buffer using a blitter (which should be really fast) */
	ddbltfx.dwSize = sizeof(ddbltfx);
	ddbltfx.dwFillColor = pixelValue32;
	hRes = lpddDevice->lpVtbl->Blt(lpddDevice,&vpRect,NULL,NULL,DDBLT_COLORFILL | DDBLT_WAIT,&ddbltfx);
	if(FAILED(hRes)) return hRes;
	return DD_OK;
}


/***************************************************************************
 ***************************************************************************
					3D primitive handling
 ***************************************************************************
 ***************************************************************************/

/* Flag determining whether we have a scene running */
static BOOL fSceneStarted = 0;
/* Flag determining whether the scene just started */
static BOOL fSceneJustStarted = 0;

/* Values for texture filtering and modulation */
#define B3D_TEXTURE_FILTER_MIN D3DFILTER_LINEAR
#define B3D_TEXTURE_FILTER_MAG D3DFILTER_LINEAR
#define B3D_TEXTURE_BLENDMODE  D3DTBLEND_MODULATE


/*
	StartScene
	Start rendering 3D stuff.
*/
HRESULT StartScene(void)
{
	HRESULT hRes;

	if(fSceneStarted) return DD_OK; /* scene already started */

	/*
     * Start the scene.
     *
     * This function must be called once and once only for every frame
     * of animation. If you have multiple execute buffers comprising a
     * single frame you must have one call to BeginScene() before
     * submitting those execute buffers.
     *
     * NOTE: If you have more than one device being rendered in a
     * single frame, say a rear view mirror in a racing game, call
     * BeginScene() and EndScene() once for each device.
     */
    hRes = lpd3dDevice->lpVtbl->BeginScene(lpd3dDevice);
    if (FAILED(hRes)) return hRes;
	/* scene is started */
	fSceneStarted = 1;
	/* scene just started */
	fSceneJustStarted = 1;
	/* assume texture change */
	fTextureChanged = 1;
	return DD_OK;
}


/*
	EndScene
	End the current scene.
*/

HRESULT EndScene(int waitForCompletion)
{
	RECT r;
	HRESULT hRes;
	if(!fSceneStarted) return DD_OK; /* scene not started */
    hRes = lpd3dDevice->lpVtbl->EndScene(lpd3dDevice);
    if (FAILED(hRes)) return hRes;
	fSceneStarted = 0;

	if(!waitForCompletion) return DD_OK;

	/* And wait for completion */
	do {
		hRes = lpddDevice->lpVtbl->GetBltStatus(lpddDevice, DDGBS_ISBLTDONE);
	} while(hRes == DDERR_WASSTILLDRAWING);
	if(FAILED(hRes)) return hRes;
#ifdef D3D_DIRECT_BLT
	r.left = r.top = 0;
	r.right = dxWidth;
	r.bottom = dxHeight;
	r = vpRect;
	hRes = lpddPrimary->lpVtbl->Blt(lpddPrimary,&r,lpddDevice,&r, DDBLT_WAIT, NULL);
	if(FAILED(hRes)) return 0;
	/* Wait until the blt completed */
	do {
		hRes = lpddDevice->lpVtbl->GetBltStatus(lpddDevice, DDGBS_ISBLTDONE);
	} while(hRes == DDERR_WASSTILLDRAWING);
	if(FAILED(hRes)) return hRes;
#endif
	return DD_OK;
}

/*
	FinishScene:
	End the current scene and wait until it has been completed.
	Copy the hardware buffer into Squeak using BitBlt and the provided
	span buffer.

	NOTE: Copying stuff back from the surface is SLOW! Its probably
		because AGP mem is not backed by the L2 cache (the graphics HW
		renders asynchronously to it) and thus each read is a *real*
		read operation (at least for a single cache line).
*/
HRESULT b3dFinishScene(unsigned int *span, int spanSize, RECT *srcRect, RECT *dstRect)
{
	DDSURFACEDESC ddsd;
	HRESULT hRes;
	RECT inRect;
	DWORD inBits, outBits;
	DWORD pixelAlign, nPixels;
	int leftX, rightX, y, i;

	hRes = EndScene(1);
	if(FAILED(hRes)) return hRes;
#ifdef D3D_DIRECT_BLT
	return DD_OK;
#endif

	/* For now constant pixel align */
	pixelAlign = 1;

	/* Adjust the source rectangle to pixel word boundaries */
	inRect = *dstRect;
	inRect.left &= ~(pixelAlign-1);
	inRect.right = (inRect.right + (pixelAlign-1)) & ~(pixelAlign-1);
	inRect.bottom = inRect.top+1;
	nPixels = inRect.right - inRect.left;

	/* Compute left and right x we'll use in the span buffer */
	leftX = inRect.left;
	rightX = leftX + nPixels;
	/* Clip to spanSize */
	if(rightX >= spanSize) rightX = spanSize-1;
	if(leftX >= spanSize) leftX = spanSize-1;
	/* And adjust nPixels */
	nPixels = rightX - leftX;
	if(nPixels < 1) return DD_OK;

	/* Compute the pointer into the span */
	outBits = (DWORD) span;
	outBits += leftX * 4;

	for(y = dstRect->top; y < dstRect->bottom; y++, inRect.top++, inRect.bottom++) {
		/* Lock the surface */
		ddsd.dwSize = sizeof(ddsd);

		/* <--- WARNING WARNING WARNING ---> */
		/*  No breakpoints after this point  */
		hRes = lpddDevice->lpVtbl->Lock(lpddDevice, &inRect, &ddsd, DDLOCK_WAIT, 0);
		if(FAILED(hRes)) return hRes;
		__try {
#ifndef NO_COPY_BACK
			/* and go copying the bits */
			switch(dwDisplayBitDepth) {
				case DDBD_16: 
					{
						unsigned short *in = ddsd.lpSurface;
						unsigned int *out = outBits;
						int n = nPixels;
						unsigned int pv;
						do {
							pv = *in; in++;
							pv = ((pv & 0x7C00) << 9) | ((pv & 0x03E0) << 6) | ((pv & 0x1F) << 3);
							*out = pv | 0xFF000000; out++;
						} while(--n);
					}
					break;
				case DDBD_24:
					{
						unsigned char *in = ddsd.lpSurface;
						unsigned char *out = outBits;
						int n = nPixels;
						do {
							*out++ = *in++; 
							*out++ = *in++;
							*out++ = *in++;
							*out++ = 0xFF;
						} while(--n);
					}
					break;
				case DDBD_32:
					{
						unsigned int *in = ddsd.lpSurface;
						unsigned int *out = outBits;
						int n = nPixels;
						do {
							*out = *in | 0xFF000000;
							out++; in++;
						} while(--n);
					}
					break;
			}
#endif
		} __finally {
			/* Unlock the surface */
			hRes = lpddDevice->lpVtbl->Unlock(lpddDevice, ddsd.lpSurface);
		}
		if(FAILED(hRes)) return hRes;
		/* <--- WARNING WARNING WARNING ---> */
		/*  No breakpoints before this point  */

		/* draw the stuff using BitBlt */
		copyBitsFromtoat(leftX,rightX,y);
	}
	return DD_OK;
}

/*
	BuildExecuteBuffer
	Build a new execute buffer with the given 
	number of vertices and vertex indexes.
*/
#ifdef D3D_DEBUG
static DWORD lastExecuteBufferSize;
#endif

HRESULT BuildExecuteBuffer(LPDIRECT3DEXECUTEBUFFER *lpd3dExecuteBuffer,
						   DWORD dwVertexCount, DWORD dwTriangleCount)
{
	HRESULT hRes;
	DWORD dwOverhead;
	DWORD dwBufferSize;
    D3DEXECUTEBUFFERDESC d3dExecuteBufferDesc;


	/* Compute the constant overhead we have per triangle
	   data execute buffer. Since the execute buffer is made
	   up of
		Instruction 1:
			STATE:
				FILLMODE
				SHADEMODE
				DITHERENABLE
		Instruction 2:
			STATE:
				TEXTUREHANDLE
				TEXTUREPERSPECTIVE
				TEXTUREFILTERMIN
				TEXTUREFILTERMAG
				TEXTUREBLENDMODE
		Instruction 3:
			PROCESSVERTICES
		Instruction 4:
			TRIANGLE
		Instruction 5:
			EXIT
	   triangles are handled separately and EXIT has no
	   overhead the actual overhead comes down to five
	   instructions, eight state changes and one 
	   PROCESSVERTICES instruction.
	*/
	dwOverhead = 5 * sizeof(D3DINSTRUCTION) + 
		8 * sizeof(D3DSTATE) +
		1 * sizeof(D3DPROCESSVERTICES);

	/* 
	 * Compute the buffer size for the execute buffer.
	 *
	 * NOTE: If we have hardware support there is a good chance
	 * that the number of vertices and the buffer size is limited.
	 * So we should at least check for this but for now we
	 * just ignore it.
	 *
	 * NOTE: In addition, the overall size of the execute buffer
	 * can be limited. Again, this is ignored for now.
	 */
	dwBufferSize = dwVertexCount * sizeof(D3DTLVERTEX) +
		dwTriangleCount * sizeof(D3DTRIANGLE) + dwOverhead;

	/*
	 * Create the execute buffer
	 */

    ZeroMemory(&d3dExecuteBufferDesc, sizeof(d3dExecuteBufferDesc));
    d3dExecuteBufferDesc.dwSize       = sizeof(d3dExecuteBufferDesc);
    d3dExecuteBufferDesc.dwFlags      = D3DDEB_BUFSIZE;
    d3dExecuteBufferDesc.dwBufferSize = dwBufferSize;
#ifdef D3D_DEBUG
	lastExecuteBufferSize = dwBufferSize;
#endif
    hRes = lpd3dDevice->lpVtbl->CreateExecuteBuffer(lpd3dDevice,
                                                    &d3dExecuteBufferDesc,
                                                    lpd3dExecuteBuffer,
                                                    NULL);
    if (FAILED(hRes)) return hRes;

	return DD_OK;
}

/*
	FillVertexBuffer
	Fill the D3D execute buffer with the given vertices.
*/
HRESULT FillExecuteBuffer(LPDIRECT3DEXECUTEBUFFER lpd3dExecuteBuffer,
						  DWORD dwVertexCount,
						  DWORD dwVertexPerFace,
						  DWORD dwFaceCount,
						  B3DPrimitiveVertex *vtx,
						  int *faces)
{
    HRESULT              hRes;
    D3DEXECUTEBUFFERDESC d3dExeBufDesc;
    D3DEXECUTEDATA       d3dExecuteData;
    LPD3DTLVERTEX        lpVertex;
    LPD3DINSTRUCTION     lpInstruction;
    LPD3DPROCESSVERTICES lpProcessVertices;
    LPD3DTRIANGLE        lpTriangle;
    LPD3DSTATE           lpState;

	DWORD				 dwInstructionLength;
	USHORT				 *lpTriangleCount;
	USHORT				 dwTriangleCount;
	DWORD				 n;

    /*
     * Lock the execute buffer.
     */
    ZeroMemory(&d3dExeBufDesc, sizeof(d3dExeBufDesc));
    d3dExeBufDesc.dwSize = sizeof(d3dExeBufDesc);
#ifdef D3D_DEBUG
	d3dExeBufDesc.lpData = GlobalLock(GlobalAlloc(GMEM_MOVEABLE | GMEM_ZEROINIT, lastExecuteBufferSize));
#else
    hRes = lpd3dExecuteBuffer->lpVtbl->Lock(lpd3dExecuteBuffer, &d3dExeBufDesc);
    if (FAILED(hRes)) return hRes;
#endif
	/* STEP 1: Fill in all the vertices */
	n = dwVertexCount;
    lpVertex = (LPD3DTLVERTEX)d3dExeBufDesc.lpData;
	do {
	    lpVertex->dvSX		 = D3DVAL(vtx->rasterPos[0]); /* X (screen coords) */
		lpVertex->dvSY		 = D3DVAL(vtx->rasterPos[1]); /* Y (screen coords) */
		lpVertex->dvSZ		 = D3DVAL(vtx->rasterPos[2]); /* Z (0.0 -- 1.0) */
		lpVertex->dvRHW		 = D3DVAL(vtx->rasterPos[3]); /* W (1.0 / w) */
		lpVertex->dcColor    = vtx->pixelValue32;		  /* Color of vertex */
		lpVertex->dcSpecular = 0;						  /* Specular color of vertex */
		lpVertex->dvTU		 = D3DVAL(vtx->texCoord[0]);  /* Texture coordinate S */
		lpVertex->dvTV		 = D3DVAL(vtx->texCoord[1]);  /* Texture coordinate T */
		lpVertex++;
		vtx++;
	} while(--n);

	/* STEP 2: Fill in the initial render state */
#ifdef D3D_DEBUG
	if(1) {
#else
	if(fSceneJustStarted) {
#endif
	    lpInstruction = (LPD3DINSTRUCTION)lpVertex;
		lpInstruction->bOpcode = D3DOP_STATERENDER;
		lpInstruction->bSize = sizeof(D3DSTATE);
		lpInstruction->wCount = 3U;
		lpInstruction++;
		lpState = (LPD3DSTATE)lpInstruction;
		lpState->drstRenderStateType = D3DRENDERSTATE_FILLMODE;
		lpState->dwArg[0] = D3DFILL_SOLID;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_SHADEMODE;
		lpState->dwArg[0] = D3DSHADE_GOURAUD;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_DITHERENABLE;
		lpState->dwArg[0] = FALSE;
		lpState++;
		fSceneJustStarted = 0;
	} else {
		lpState = (LPD3DSTATE) lpVertex;
	}

	/* STEP 3: Fill in the texture state */
#ifdef D3D_DEBUG
	if(1) {
#else
	if(fTextureChanged) {
#endif
		lpInstruction = (LPD3DINSTRUCTION)lpState;
		lpInstruction->bOpcode = D3DOP_STATERENDER;
		lpInstruction->bSize = sizeof(D3DSTATE);
		lpInstruction->wCount = 5U;
		lpInstruction++;

		lpState = (LPD3DSTATE)lpInstruction;
		lpState->drstRenderStateType = D3DRENDERSTATE_TEXTUREHANDLE;
		lpState->dwArg[0] = b3dActiveTexture ? b3dActiveTexture->hTexture : 0;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_TEXTUREPERSPECTIVE;
		lpState->dwArg[0] = TRUE;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_TEXTUREMIN;
		lpState->dwArg[0] = B3D_TEXTURE_FILTER_MIN;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_TEXTUREMAG;
		lpState->dwArg[0] = B3D_TEXTURE_FILTER_MAG;
		lpState++;
		lpState->drstRenderStateType = D3DRENDERSTATE_TEXTUREMAPBLEND;
		lpState->dwArg[0] = B3D_TEXTURE_BLENDMODE;
		lpState++;
	}

	/* STEP 4: Fill in the instructions for processing the mesh */

    /*
     * The process vertices instruction tells the driver what to
     * do with the vertices in the buffer. Since we have pre-transformed
	 * and lit vertices we simply copy those vertices.
     */
    lpInstruction = (LPD3DINSTRUCTION)lpState;
    lpInstruction->bOpcode = D3DOP_PROCESSVERTICES;
    lpInstruction->bSize   = sizeof(D3DPROCESSVERTICES);
    lpInstruction->wCount  = 1U;
    lpInstruction++;
    lpProcessVertices = (LPD3DPROCESSVERTICES)lpInstruction;
    lpProcessVertices->dwFlags    = D3DPROCESSVERTICES_COPY;
    lpProcessVertices->wStart     = 0U;           /* First source vertex */
    lpProcessVertices->wDest      = 0U;
    lpProcessVertices->dwCount    = dwVertexCount; /* Number of vertices  */
    lpProcessVertices->dwReserved = 0UL;
    lpProcessVertices++;

    /*
     * STEP 5: Draw the mesh
     */
    lpInstruction = (LPD3DINSTRUCTION)lpProcessVertices;
    lpInstruction->bOpcode = D3DOP_TRIANGLE;
    lpInstruction->bSize   = sizeof(D3DTRIANGLE);
	/* cache away the pointer to the # of tris -- this will be set at the end */
	lpTriangleCount = &lpInstruction->wCount;
    lpInstruction->wCount  = 0U;
    lpInstruction++;

	/* Fill in the triangles */
    lpTriangle = (LPD3DTRIANGLE)lpInstruction;
	dwTriangleCount = 0;
	if(dwVertexPerFace == 0 && dwFaceCount == 1) {
		/* This is a non-indexed polygon */
		int n;
		for(n = 2; n < (int) dwVertexCount; n++) {
			lpTriangle->wV1 = 0;
			lpTriangle->wV2 = n-1;
			lpTriangle->wV3 = n;
			lpTriangle->wFlags = D3DTRIFLAG_EDGEENABLETRIANGLE;
			lpTriangle++;
			dwTriangleCount++;
		}
	} else {
		int nFaces = dwFaceCount;
		/* This is an indexed polygon set */
		if(dwVertexPerFace == 3) {
			do {
				int idx0 = *faces++;
				int idx1 = *faces++;
				int idx2 = *faces++;
				if(idx0 > 0 && idx1 > 0 && idx2 > 0) {
					lpTriangle->wV1 = idx0 - 1;
					lpTriangle->wV2 = idx1 - 1;
					lpTriangle->wV3 = idx2 - 1;
					lpTriangle->wFlags = D3DTRIFLAG_EDGEENABLETRIANGLE;
					lpTriangle++;
					dwTriangleCount++;
				} else {
					idx0 = idx0;
				}
			} while(--nFaces);
		} else if(dwVertexPerFace == 4) {
			do {
				int idx0 = *faces++;
				int idx1 = *faces++;
				int idx2 = *faces++;
				int idx3 = *faces++;
				if(idx0 > 0 && idx1 > 0 && idx2 > 0) {
					lpTriangle->wV1 = idx0 - 1;
					lpTriangle->wV2 = idx1 - 1;
					lpTriangle->wV3 = idx2 - 1;
					lpTriangle->wFlags = D3DTRIFLAG_EDGEENABLETRIANGLE;
					lpTriangle++;
					dwTriangleCount++;
				} else {
					idx0 = idx0;
				}
				if(idx2 > 0 && idx3 > 0 && idx0 > 0) {
					lpTriangle->wV1 = idx2 - 1;
					lpTriangle->wV2 = idx3 - 1;
					lpTriangle->wV3 = idx0 - 1;
					lpTriangle->wFlags = D3DTRIFLAG_EDGEENABLETRIANGLE;
					lpTriangle++;
					dwTriangleCount++;
				} else {
					idx0 = idx0;
				}
			} while(--nFaces);
		}
	}
	*lpTriangleCount = dwTriangleCount;

    /* STEP 6: Stop execution of the buffer. */
    lpInstruction = (LPD3DINSTRUCTION)lpTriangle;
    lpInstruction->bOpcode = D3DOP_EXIT;
    lpInstruction->bSize   = 0UL;
    lpInstruction->wCount  = 0U;
	lpInstruction++;

	/*
	 * Compute the length of the instructions.
	 */
	dwInstructionLength = (LPSTR)lpInstruction - (LPSTR)lpVertex;

#ifdef D3D_DEBUG
	if( (LPSTR) lpInstruction > (LPSTR)d3dExeBufDesc.lpData + lastExecuteBufferSize) {
		/* So I can put a breakpoint here */
		lastExecuteBufferSize = lastExecuteBufferSize;
	}

	{
		void *lpData = d3dExeBufDesc.lpData;
	    hRes = lpd3dExecuteBuffer->lpVtbl->Lock(lpd3dExecuteBuffer, &d3dExeBufDesc);
		if (FAILED(hRes)) return hRes;
		memcpy(d3dExeBufDesc.lpData, lpData, lastExecuteBufferSize);
	}
#endif
    /*
     * Unlock the execute buffer.
     */
    lpd3dExecuteBuffer->lpVtbl->Unlock(lpd3dExecuteBuffer);

    /*
     * Set the execute data so Direct3D knows how many vertices are in the
     * buffer and where the instructions start.
     */
    ZeroMemory(&d3dExecuteData, sizeof(d3dExecuteData));
    d3dExecuteData.dwSize = sizeof(d3dExecuteData);
    d3dExecuteData.dwVertexCount       = dwVertexCount;
    d3dExecuteData.dwInstructionOffset = dwVertexCount * sizeof(D3DTLVERTEX);
    d3dExecuteData.dwInstructionLength = dwInstructionLength;
    hRes = lpd3dExecuteBuffer->lpVtbl->SetExecuteData(lpd3dExecuteBuffer, &d3dExecuteData);
    if (FAILED(hRes)) return hRes;

    return DD_OK;
}

/*
	b3dDrawIndexedTriangles
	Draw an indexed triangle mesh.
*/
HRESULT b3dDrawIndexedTriangles(int textureIndex, 
								B3DPrimitiveVertex *vtxPointer, int nVertices,
								B3DInputFace *facePtr, int nFaces)
{
	HRESULT hRes;
	LPDIRECT3DEXECUTEBUFFER lpd3dExecuteBuffer;

	/* Setup the texture */
	b3dInstallTexture(textureIndex);
	/* Map the vertices into the viewport */
	b3dMapObjectVertices(vtxPointer, nVertices);
	/* Build the execute buffer */
	hRes = BuildExecuteBuffer(&lpd3dExecuteBuffer, nVertices, nFaces);
	if(FAILED(hRes)) goto errorCleanup;
	/* Fill the execute buffer */
	hRes = FillExecuteBuffer(lpd3dExecuteBuffer, nVertices, 3, nFaces, vtxPointer, facePtr);
	if(FAILED(hRes)) goto errorCleanup;
	/* Make sure the scene has started */
	hRes = StartScene();
	if(FAILED(hRes)) goto errorCleanup;
	/* Submit the execute buffer */
	hRes = lpd3dDevice->lpVtbl->Execute(lpd3dDevice,lpd3dExecuteBuffer, lpd3dViewport, D3DEXECUTE_UNCLIPPED);
	if(FAILED(hRes)) goto errorCleanup;
#ifdef D3D_DEBUG
	hRes = EndScene(1);
#endif
	/* Free the execute buffer */
	hRes = lpd3dExecuteBuffer->lpVtbl->Release(lpd3dExecuteBuffer);
	if(!FAILED(hRes))  return DD_OK;
errorCleanup:
	EndScene(0);
	sqDirectXRelease();
	fDirectXEnabled = 0;
	return hRes;
}


/*
	b3dDrawIndexedQuads
	Draw an indexed quad mesh.
*/
HRESULT b3dDrawIndexedQuads(int textureIndex, 
							B3DPrimitiveVertex *vtxPointer, int nVertices,
							B3DInputFace *facePtr, int nFaces)
{
	HRESULT hRes;
	LPDIRECT3DEXECUTEBUFFER lpd3dExecuteBuffer;

	/* Setup the texture */
	b3dInstallTexture(textureIndex);
	/* Map the vertices into the viewport */
	b3dMapObjectVertices(vtxPointer, nVertices);
	/* Build the execute buffer */
	hRes = BuildExecuteBuffer(&lpd3dExecuteBuffer, nVertices, nFaces*2);
	if(FAILED(hRes)) goto errorCleanup;
	/* Fill the execute buffer */
	hRes = FillExecuteBuffer(lpd3dExecuteBuffer, nVertices, 4, nFaces, vtxPointer, facePtr);
	if(FAILED(hRes)) goto errorCleanup;
	/* Make sure the scene has started */
	hRes = StartScene();
	if(FAILED(hRes)) goto errorCleanup;
	/* Submit the execute buffer */
	hRes = lpd3dDevice->lpVtbl->Execute(lpd3dDevice,lpd3dExecuteBuffer, lpd3dViewport, D3DEXECUTE_UNCLIPPED);
	if(FAILED(hRes)) goto errorCleanup;
#ifdef D3D_DEBUG
	hRes = EndScene(1);
#endif
	/* Free the execute buffer */
	hRes = lpd3dExecuteBuffer->lpVtbl->Release(lpd3dExecuteBuffer);
	if(!FAILED(hRes))  return DD_OK;
errorCleanup:
	EndScene(0);
	sqDirectXRelease();
	fDirectXEnabled = 0;
	return hRes;
}

/*
	b3dDrawPolygon
	Draw a single polygon.
*/
HRESULT b3dDrawPolygon(int textureIndex, 
						B3DPrimitiveVertex *vtxPointer, int nVertices)
{
	HRESULT hRes;
	LPDIRECT3DEXECUTEBUFFER lpd3dExecuteBuffer;

	/* Setup the texture */
	b3dInstallTexture(textureIndex);
	/* Map the vertices into the viewport */
	b3dMapObjectVertices(vtxPointer, nVertices);
	/* Build the execute buffer */
	hRes = BuildExecuteBuffer(&lpd3dExecuteBuffer, nVertices, nVertices-2);
	if(FAILED(hRes)) goto errorCleanup;
	/* Fill the execute buffer */
	hRes = FillExecuteBuffer(lpd3dExecuteBuffer, nVertices, 0, 1, vtxPointer, NULL);
	if(FAILED(hRes)) goto errorCleanup;
	/* Make sure the scene has started */
	hRes = StartScene();
	if(FAILED(hRes)) goto errorCleanup;
	/* Submit the execute buffer */
	hRes = lpd3dDevice->lpVtbl->Execute(lpd3dDevice,lpd3dExecuteBuffer, lpd3dViewport, D3DEXECUTE_UNCLIPPED);
	if(FAILED(hRes)) goto errorCleanup;
#ifdef D3D_DEBUG
	hRes = EndScene(1);
#endif
	/* Free the execute buffer */
	hRes = lpd3dExecuteBuffer->lpVtbl->Release(lpd3dExecuteBuffer);
	if(!FAILED(hRes))  return DD_OK;
errorCleanup:
	EndScene(0);
	sqDirectXRelease();
	fDirectXEnabled = 0;
	return hRes;
}


/***************************************************************************
 ***************************************************************************
					DirectDraw Initialization
 ***************************************************************************
 ***************************************************************************/
/*
	sqDirectXCreateOffscreenSurface:
		Create the offscreen rendering surface for Squeak.
*/
int sqDirectXCreateOffscreenSurface(DWORD dwWidth, DWORD dwHeight)
{
    DDSURFACEDESC   ddsd;
    HRESULT         hRes;
    DWORD           dwZBufferBitDepth;
	D3DVIEWPORT		d3dViewport;

	/* Clean up the old offscreen surfaces (if any) */
#define RELEASE(lp) if(lp) { lp->lpVtbl->Release(lp); lp = NULL; }
	RELEASE(lpd3dViewport);
	RELEASE(lpd3dDevice);
	RELEASE(lpddZBuffer);
	RELEASE(lpddDevice);
#undef RELEASE

    /*
     * Create the device surface. The pixel format will be identical
     * to the primary so we don't have to explicitly specify it. We do
     * need to explicity specify the size, memory type and capabilities
     * of the surface.
     */
    ZeroMemory(&ddsd, sizeof(ddsd));
    ddsd.dwSize         = sizeof(ddsd);
    ddsd.dwFlags        = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT;
    ddsd.dwWidth        = dwWidth + 4; /* four extra pixels at right */
    ddsd.dwHeight       = dwHeight;
	/* Try creating the surface in video memory before anything else */
    ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN | DDSCAPS_VIDEOMEMORY;
	/************************ BEGIN D3D INIT ************************/
	ddsd.ddsCaps.dwCaps |= DDSCAPS_3DDEVICE;
	/************************ END D3D INIT ************************/
    hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddDevice, NULL);
    if (FAILED(hRes)) {
		/* The creation in video memory failed.
		   Try again but this time using system memory. */
		ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN | DDSCAPS_SYSTEMMEMORY;
		/************************ BEGIN D3D INIT ************************/
		ddsd.ddsCaps.dwCaps |= DDSCAPS_3DDEVICE;
		/************************ END D3D INIT ************************/
		hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddDevice, NULL);
		if(FAILED(hRes)) return hRes;
	}

	/************************ BEGIN D3D INIT ************************/
    /*
     * We now determine whether we need a z-buffer or not and if so
     * its bit depth. 
     */
    if (d3dDeviceDesc.dwDeviceZBufferBitDepth)
    {
        /*
         * The device supports z-buffering. Determine the depth.
         */
		if(d3dDeviceDesc.dwDeviceZBufferBitDepth & DDBD_16)
			dwZBufferBitDepth = 16;
		else  if(d3dDeviceDesc.dwDeviceZBufferBitDepth & DDBD_32)
			dwZBufferBitDepth = 32;
		else if(d3dDeviceDesc.dwDeviceZBufferBitDepth & DDBD_8)
			dwZBufferBitDepth = 8;

        /*
         * Create the z-buffer.
         */
        ZeroMemory(&ddsd, sizeof(ddsd));
        ddsd.dwSize            = sizeof(ddsd);
        ddsd.dwFlags           = DDSD_CAPS   |
                                 DDSD_WIDTH  |
                                 DDSD_HEIGHT |
                                 DDSD_ZBUFFERBITDEPTH;
		if(fIsHardwareDevice)
	        ddsd.ddsCaps.dwCaps    = DDSCAPS_ZBUFFER | DDSCAPS_VIDEOMEMORY;
		else
	        ddsd.ddsCaps.dwCaps    = DDSCAPS_ZBUFFER | DDSCAPS_SYSTEMMEMORY;
        ddsd.dwWidth           = dwWidth + 4; /* dimensions must match */
        ddsd.dwHeight          = dwHeight;
        ddsd.dwZBufferBitDepth = dwZBufferBitDepth;
		hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddZBuffer, NULL);
        if (FAILED(hRes)) {
			/* Try again in system memory */
	        ddsd.ddsCaps.dwCaps = DDSCAPS_ZBUFFER | DDSCAPS_SYSTEMMEMORY;
			hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddZBuffer, NULL);
			if (FAILED(hRes)) return hRes;
		}

        /*
         * Attach it to the rendering target.
         */
        hRes = lpddDevice->lpVtbl->AddAttachedSurface(lpddDevice, lpddZBuffer);
        if (FAILED(hRes)) return hRes;
    }
    /*
     * Now all the elements are in place (device surface in correct
     * memory type, attached z-buffer of correct depth and memory
     * type, and palette if necessary) we can actually query for the
     * Direct3D we choose earlier.
     */
    hRes = lpddDevice->lpVtbl->QueryInterface(lpddDevice, &guidDevice, &lpd3dDevice);
    if (FAILED(hRes)) return hRes;

	/*
	 * Check the texture formats so we know what we may use later on.
	 */
	fTextureFound08 = 
		fTextureFound0x5x5x5 = 
		fTextureFound1x5x5x5 =
		fTextureFound4x4x4x4 =
		fTextureFound0x8x8x8 =
		fTextureFound8x8x8x8 = 0;
	hRes = lpd3dDevice->lpVtbl->EnumTextureFormats(lpd3dDevice, FindTextureCallback, NULL);
    if (FAILED(hRes)) return hRes;

	/*
	 * Add the viewport for the device
	 */
    hRes = lpd3d->lpVtbl->CreateViewport(lpd3d, &lpd3dViewport, NULL);
    if (FAILED(hRes)) return hRes;
    hRes = lpd3dDevice->lpVtbl->AddViewport(lpd3dDevice, lpd3dViewport);
    if (FAILED(hRes)) return hRes;

	/*
	 * Set the viewport to cover the surface completely.
	 */
    ZeroMemory(&d3dViewport, sizeof(d3dViewport));
    d3dViewport.dwSize   = sizeof(d3dViewport);
    d3dViewport.dwX      = 0UL;
    d3dViewport.dwY      = 0UL;
    d3dViewport.dwWidth  = dwWidth;
    d3dViewport.dwHeight = dwHeight;
    d3dViewport.dvScaleX = D3DVAL((float)d3dViewport.dwWidth / 2.0);
    d3dViewport.dvScaleY = D3DVAL((float)d3dViewport.dwHeight / 2.0);
    d3dViewport.dvMaxX   = D3DVAL(1.0);
    d3dViewport.dvMaxY   = D3DVAL(1.0);
    hRes = lpd3dViewport->lpVtbl->SetViewport(lpd3dViewport, &d3dViewport);
    if (FAILED(hRes)) return hRes;

	/************************ END D3D INIT ************************/

	dxWidth = dwWidth;
	dxHeight = dwHeight;
    return DD_OK;
}

/*
	sqDirectXRelease:
		Cleanup DirectX stuff.
*/
void sqDirectXRelease(void)
{
	b3dReleaseTextureCache();
#define RELEASE(lp) if(lp) { lp->lpVtbl->Release(lp); lp = NULL; }
	RELEASE(lpd3dViewport);
	RELEASE(lpd3dDevice);
	RELEASE(lpd3d);
	RELEASE(lpddClipper);
	RELEASE(lpddZBuffer);
	RELEASE(lpddDevice);
	RELEASE(lpddPrimary);
	RELEASE(lpdd);
#undef RELEASE
}

/*
	sqDirectXInitialize:
		One-time initialization of DirectX.
		If anything fails here we will never try again.
*/

HRESULT sqDirectXInitialize(void)
{
	static int ComInit = 0;
	HRESULT hRes;
    DDSURFACEDESC   ddsd;

	b3dInitTextureCache();
	/* Initialize the COM interface */
	if(!ComInit) {
		hRes = CoInitialize(NULL);
		if(FAILED(hRes)) goto errorCleanup;
		ComInit = 1;
	}
	/* Create a new DirectDraw interface */
	hRes = CoCreateInstance(&CLSID_DirectDraw, NULL, CLSCTX_ALL, &IID_IDirectDraw, &lpdd);
	if(FAILED(hRes)) goto errorCleanup;
	
	/* Initialize DDraw */
	hRes = lpdd->lpVtbl->Initialize(lpdd, NULL);
	if(FAILED(hRes)) goto errorCleanup;

	/* Get the capabilities of the hardware */
	hwCaps.dwSize = sizeof(hwCaps);
	swCaps.dwSize = sizeof(swCaps);
	hRes = lpdd->lpVtbl->GetCaps(lpdd, &hwCaps, &swCaps);
	if(FAILED(hRes)) goto errorCleanup;
	
	/* Get the current display mode */
	ZeroMemory(&displayDesc, sizeof(displayDesc));
	displayDesc.dwSize = sizeof(displayDesc);
	hRes = lpdd->lpVtbl->GetDisplayMode(lpdd, &displayDesc);
	if(FAILED(hRes)) goto errorCleanup;
	
	/* Set the display depth bit */
	switch(displayDesc.ddpfPixelFormat.dwRGBBitCount) {
		case 16: dwDisplayBitDepth = DDBD_16; break;
		case 24: dwDisplayBitDepth = DDBD_24; break;
		case 32: dwDisplayBitDepth = DDBD_32; break;
		default: goto errorCleanup; /* we only deal with 16-32 bit */
	};

	/* Set the cooperation level */
	hRes = lpdd->lpVtbl->SetCooperativeLevel(lpdd, stWindow,
			DDSCL_NORMAL /* | DDSCL_ALLOWREBOOT | DDSCL_EXCLUSIVE | DDSCL_FULLSCREEN */); 
	if(FAILED(hRes)) goto errorCleanup;

	/************************ BEGIN D3D INIT **************************/

	/* Get the D3D interface */
	hRes = lpdd->lpVtbl->QueryInterface(lpdd, &IID_IDirect3D, &lpd3d);
	if(FAILED(hRes)) goto errorCleanup;

	/* Choose a rendering device */
    fDeviceFound = 0;
    hRes = lpd3d->lpVtbl->EnumDevices(lpd3d, EnumDeviceCallback, &fDeviceFound);
	if(FAILED(hRes)) return 0;
	if(!fDeviceFound) return 0;

	/************************ END D3D INIT **************************/

	/* Create the primary surface */
    ZeroMemory(&ddsd, sizeof(ddsd));
    ddsd.dwSize         = sizeof(ddsd);
    ddsd.dwFlags        = DDSD_CAPS;
    ddsd.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;
    hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddPrimary, NULL);
    if (FAILED(hRes)) goto errorCleanup;

#if 0
	/* Create the 32bit surface */
    ZeroMemory(&ddsd, sizeof(ddsd));
    ddsd.dwSize         = sizeof(ddsd);
    ddsd.dwFlags        = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
    ddsd.dwWidth        = 1;
    ddsd.dwHeight       = 1;
	ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN | DDSCAPS_SYSTEMMEMORY;
	ddsd.ddpfPixelFormat.dwSize = sizeof(DDPIXELFORMAT);
	ddsd.ddpfPixelFormat.dwFlags = DDPF_RGB;
	ddsd.ddpfPixelFormat.dwRGBBitCount = 32;
	ddsd.ddpfPixelFormat.dwRBitMask = 0x00FF0000;
	ddsd.ddpfPixelFormat.dwGBitMask = 0x0000FF00;
	ddsd.ddpfPixelFormat.dwBBitMask = 0x000000FF;
    hRes = lpdd->lpVtbl->CreateSurface(lpdd, &ddsd, &lpddSqueak32, NULL);
	if(FAILED(hRes)) lpddSqueak32 = NULL;
#endif
    /*
     * Create the clipper. We bind the application's window to the
     * clipper and attach it to the primary. This ensures then when we
     * blit from the rendering surface to the primary we don't write
     * outside the visible region of the window.
     */
    hRes = lpdd->lpVtbl->CreateClipper(lpdd, 0UL, &lpddClipper, NULL);
    if (FAILED(hRes)) goto errorCleanup;
    hRes = lpddClipper->lpVtbl->SetHWnd(lpddClipper, 0UL, stWindow);
    if (FAILED(hRes)) goto errorCleanup;
    hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, lpddClipper);
    if (FAILED(hRes)) goto errorCleanup;
	fClipperAttached = 1;
	/* successful initialization */
	return DD_OK;
errorCleanup:
	sqDirectXRelease();
	return hRes;
}

/***************************************************************************
 ***************************************************************************
					Squeak Display function
 ***************************************************************************
 ***************************************************************************/

/* 
	ioDirectXShowDisplay:
		Perform the ioShowDisplay operation using DirectX.
		Return true if successful, false to run GDI code.
*/
int ioDirectXShowDisplayBits(int dispBitsIndex, int width, int height, int depth,
		  int affectedL, int affectedR, int affectedT, int affectedB)
{
	HRESULT hRes;
	RECT dxRect;
	RECT dstRect;

	if(depth < 8) return 0; /* Never even try with < 8 bit */

	if(!fDirectXEnabled) return 0; /* DX has been disabled */

	/*	Adjust the affected region so we only draw stuff that's visible. */
	if (affectedR > width) affectedR = width;
	if (affectedB > height) affectedB = height;
	if (affectedL > width) affectedL = width;
	if (affectedT > height) affectedT = height;

	/* Don't draw empty areas */
	if(affectedL == affectedR || affectedT == affectedB) return 1;

	/* Check if DirectX has been initialized.
	   If not, perform startup initialization
	   and initialize hasDirectX with the result */
	if(!lpdd) {
		sqDirectXInitialize();
		fDirectXEnabled = lpdd != 0;
		if(!fDirectXEnabled) return 0;
	}

	/* Check if Squeak's display has changed.
	   If so, create a new offscreen surface for rendering */
	if(!lpddDevice || width > (int)dxWidth || height > (int)dxHeight) {
		sqDirectXCreateOffscreenSurface(width, height);
		/* If the initialization failed, get out and never try again */
		if(!lpddDevice) {
			sqDirectXRelease();
			fDirectXEnabled = 0;
			return 0;
		}
		/* Make sure that we fill the entire newly created surface */
		affectedL = 0;
		affectedR = 0;
		affectedR = width;
		affectedB = height;
	}

	/* Check if any surface has been lost.
	   We don't actually have to do this every
	   time we come into this method but for the
	   moment it's a simple way to ensure that
	   this is not a problem. */

	/* Check the primary surface */
	D3DRESTORE(lpddPrimary);
	/* Check if the render surface is still there */
	D3DRESTORE(lpddDevice);
	/* Check if the z buffer is still there */
	D3DRESTORE(lpddZBuffer);

	/* Setup dxRect for copying */
	dxRect.left = affectedL;
	dxRect.right = affectedR;
	dxRect.top = affectedT;
	dxRect.bottom = affectedB;

	/* Okay, now we're all set. */
	switch(depth) {
		case 8:	
			switch(dwDisplayBitDepth) {
				case DDBD_16: hRes = Copy8x16(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_24: hRes = Copy8x24(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_32: hRes = Copy8x32(dispBitsIndex, width, lpddDevice, &dxRect); break;
				default: return 0;
			} break;
		case 16: 
			switch(dwDisplayBitDepth) {
				case DDBD_16: hRes = FastCopy16x16(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_24: hRes = FastCopy16x24(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_32: hRes = FastCopy16x32(dispBitsIndex, width, lpddDevice, &dxRect); break;
				default: return 0;
			} break;
		case 32:
			switch(dwDisplayBitDepth) {
				case DDBD_16: hRes = FastCopy32x16(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_24: hRes = FastCopy32x24(dispBitsIndex, width, lpddDevice, &dxRect); break;
				case DDBD_32: hRes = FastCopy32x32(dispBitsIndex, width, lpddDevice, &dxRect); break;
				default: return 0;
			} break;
		default: return 0;
	};
	if(FAILED(hRes)) return 0;

	/* Check the clip status.

	   NOTE: Using a clipper is usually a simple way to ensure 
	   that we don't draw over any occluded areas. It is, however,
	   also quite a bit slower since we cannot use BltFast
	   if a clipper is attached to the primary surface.
	   (BltFast is on my machine about a factor of two faster).
	   Thus, we're trying to do a smart check to see if clipping
	   is actually required. Later on we might do this check
	   only occasionally, in particular if we're running in
	   full screen mode.

	   NOTE: We're checking the clipper status *immediately* before
	   doing the actual blt (and not before the conversion). This is
	   because the conversion may take some milliseconds in which
	   a change in the window order could occur.
	*/
	if(fDirectXSmartClipper) {
		static DWORD dwSize = 1024;
		static char clb[1024];
		static LPRGNDATA rgnData = (LPRGNDATA) clb;

		/* Query the current clip list */
		hRes = lpddClipper->lpVtbl->GetClipList(lpddClipper, NULL, rgnData, &dwSize);
		if(hRes == DD_OK) {
			/* Check if the clip list has no entry (e.g., entire region invisible) */
			if(rgnData->rdh.nCount == 0)
				return 1;
			/* Check if the clip list has one entry.
			   If so, detach the clipper so we can use BltFast */
			if(rgnData->rdh.nCount > 1) {
				/* More than one entry -- attach the clipper */
				if(!fClipperAttached) {
					hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, lpddClipper);
					if(!FAILED(hRes)) fClipperAttached = 1;
				}
			} else {
				/* One entry. Detach the clipper. */
				if(fClipperAttached) {
					hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, NULL);
					if(!FAILED(hRes)) fClipperAttached = 0;
				}
			}
		} else if(hRes == DDERR_REGIONTOOSMALL && !fClipperAttached) {
			/* We have no clipper but the region is too small
			   (meaning there's lots of stuff to clip). Attach it. */
			hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, lpddClipper);
			if(!FAILED(hRes)) fClipperAttached = 1;
		} else return 0;
		/* After detaching the clipper, set the affected region so
		   we don't accidentally blt outside the window */
		if(rgnData->rdh.nCount == 1 && !fClipperAttached) {
			int dx, dy;
			/* Compute the inset of clip rect into stWindowRect */
			dx = rgnData->rdh.rcBound.left - stWindowRect.left;
			dy = rgnData->rdh.rcBound.top - stWindowRect.top;
			if(affectedL < dx) affectedL = dx;
			if(affectedT < dy) affectedT = dy;
			/* Compute the distance of the clip rect from stWindowRect origin */
			dx = rgnData->rdh.rcBound.right - stWindowRect.left;
			dy = rgnData->rdh.rcBound.bottom - stWindowRect.top;
			if(affectedR > dx) affectedR = dx;
			if(affectedB > dy) affectedB = dy;
		}
	}


	/* Finally, copy the stuff out */
	dxRect.left = affectedL;
	dxRect.right = affectedR;
	dxRect.top = affectedT;
	dxRect.bottom = affectedB;

	dstRect.left   = stWindowRect.left + affectedL;
	dstRect.top    = stWindowRect.top  + affectedT;
	dstRect.right  = stWindowRect.left + affectedR;
	dstRect.bottom = stWindowRect.top  + affectedB;

	if(!fClipperAttached) {
		/* No clipper attached. We can use the BltFast method
		   which is usually quite a bit faster than anything else. */
		hRes = lpddPrimary->lpVtbl->BltFast(lpddPrimary, dstRect.left, dstRect.top, lpddDevice, &dxRect, DDBLTFAST_WAIT | DDBLTFAST_NOCOLORKEY);
	}
	if(fClipperAttached || FAILED(hRes)) {
		/* If we have a clipper attached or BltFast went
		   wrong do it the normal way. */
		hRes = lpddPrimary->lpVtbl->Blt(lpddPrimary,&dstRect,lpddDevice, &dxRect, DDBLT_WAIT, NULL);
	}
	if(FAILED(hRes)) return 0;

#if 0
	/* Wait until the blt completed */
	do {
		hRes = lpddDevice->lpVtbl->GetBltStatus(lpddDevice, DDGBS_ISBLTDONE);
	} while(hRes == DDERR_WASSTILLDRAWING);
	if(FAILED(hRes)) return hRes;
#endif
	return 1;
}


/***************************************************************************
 ***************************************************************************
					Squeak primitives
 ***************************************************************************
 ***************************************************************************/

int methodArgumentCount();
int primitiveFail();
int stackValue(int);
int stackIntegerValue(int);
int stackObjectValue(int);
int booleanValueOf(int);
int failed();
int pop(int);
int push(int);
int pushBool(int);
int integerObjectOf(int);
int isPointers(int);
int isWords(int);
int slotSizeOf(int);
void *firstIndexableField(int);
int positive32BitValueOf(int);
int fetchPointerofObject(int, int);
int fetchClassOf(int);
int classBitmap(void);
int classPoint(void);
int byteSizeOf(int);


void* stackPrimitiveVertex(int index) {
    int oop;

	oop = stackObjectValue(index);
	if (oop == null) {
		return null;
	}
	if ((isWords(oop)) && ((slotSizeOf(oop)) == 16)) {
		return firstIndexableField(oop);
	}
	return null;
}

void* stackPrimitiveVertexArrayofSize(int index, int nItems) {
    int oopSize;
    int oop;

	oop = stackObjectValue(index);
	if (oop == null) {
		return null;
	}
	if (isWords(oop)) {
		oopSize = slotSizeOf(oop);
		if (((oopSize >= nItems) * 16) && ((oopSize % 16) == 0)) {
			return firstIndexableField(oop);
		}
	}
	return null;
}

void* stackPrimitiveIndexArrayofSizevalidateforVertexSize(int stackIndex, int nItems, int aBool, int maxIndex) {
    int oop;
    int i;
    int *idxPtr;
    int oopSize;
    int index;

	oop = stackObjectValue(stackIndex);
	if (oop == null) {
		return null;
	}
	if (!(isWords(oop))) {
		return null;
	}
	oopSize = slotSizeOf(oop);
	if (oopSize < nItems) {
		return null;
	}
	idxPtr = ((int *) (firstIndexableField(oop)));
	if (aBool) {
		for (i = 0; i <= (nItems - 1); i += 1) {
			index = idxPtr[i];
			if ((index < 0) || (index > maxIndex)) {
				return null;
			}
		}
	}
	return idxPtr;
}

int loadRectFrominto(int stackIndex, sqRect *rect) {
    int oop;
    int x0;
    int x1;
    int y0;
    int p1;
    int p2;
    int y1;

	oop = stackObjectValue(stackIndex);
	if (failed()) {
		return null;
	}
	if (!(isPointers(oop))) {
		return primitiveFail();
	}
	if ((slotSizeOf(oop)) < 2) {
		return primitiveFail();
	}
	p1 = fetchPointerofObject(0, oop);
	p2 = fetchPointerofObject(1, oop);
	if (!((fetchClassOf(p1)) == (classPoint()))) {
		return primitiveFail();
	}
	if (!((fetchClassOf(p2)) == (classPoint()))) {
		return primitiveFail();
	}
	x0 = fetchIntegerofObject(0, p1);
	y0 = fetchIntegerofObject(1, p1);
	x1 = fetchIntegerofObject(0, p2);
	y1 = fetchIntegerofObject(1, p2);
	if (failed()) {
		return null;
	}
	rect->left = x0;
	rect->top = y0;
	rect->right = x1;
	rect->bottom = y1;
	return 0;
}

EXPORT(int) primB3dClearDepthBuffer(void) {
	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	b3dClearDepthBuffer();
	return 1;
}

EXPORT(int) primB3dClearViewport(void) {
    int pv;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	pv = positive32BitValueOf(stackValue(0));
	if (failed()) {
		return null;
	}
	b3dClearViewport(pv);
	return pop(1);
}

EXPORT(int) primB3dFinish(void) {
    unsigned int *span;
    int oop;
    sqRect dstRect;
    sqRect srcRect;
    int spanSize;

	if (!((methodArgumentCount()) == 4)) {
		return primitiveFail();
	}
	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	loadRectFrominto(0, &dstRect);
	loadRectFrominto(1, &srcRect);
	oop = stackObjectValue(2);
	if (!((fetchClassOf(oop)) == (classBitmap()))) {
		return primitiveFail();
	}
	spanSize = slotSizeOf(oop);
	span = firstIndexableField(oop);
	oop = stackObjectValue(3);
	if (!(loadBitBltFrom(oop))) {
		return primitiveFail();
	}
	b3dFinishScene(span, spanSize, (RECT*)&srcRect, (RECT*)&dstRect);
}

EXPORT(int) primB3dInitialize(void) {
    int ok;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	ok = 1;
	pop(1);
	return pushBool(ok);
}

EXPORT(int) primB3dLoadTexture(void) {
    unsigned int *bitsPtr;
    int formHeight;
    int form;
    int texInterpolate;
    int formWidth;
    int formDepth;
    int formBits;
    int texWrap;
    int texEnvMode;
    int handle;

	form = stackObjectValue(0);
	if (failed()) {
		return 0;
	}
	if (!(isPointers(form))) {
		return 0;
	}
	if ((slotSizeOf(form)) < 8) {
		return 0;
	}
	formBits = fetchPointerofObject(0, form);
	formWidth = fetchIntegerofObject(1, form);
	formHeight = fetchIntegerofObject(2, form);
	formDepth = fetchIntegerofObject(3, form);
	texWrap = booleanValueOf(fetchPointerofObject(5, form));
	texInterpolate = booleanValueOf(fetchPointerofObject(6, form));
	texEnvMode = fetchIntegerofObject(7, form);
	if (failed()) {
		return 0;
	}
	if ((formWidth < 1) || ((formHeight < 1) || (formDepth != 32))) {
		return 0;
	}
	if (!((fetchClassOf(formBits)) == (classBitmap()))) {
		return 0;
	}
	if (!((byteSizeOf(formBits)) == ((formWidth * formHeight) * 4))) {
		return 0;
	}
	if ((texEnvMode < 0) || (texEnvMode > 1)) {
		return 0;
	}
	bitsPtr = firstIndexableField(formBits);
	handle = b3dLoadTexture(formWidth, formHeight, formDepth, (unsigned int*) bitsPtr);
	if (handle < 0) {
		return primitiveFail();
	}
	pop(2);
	return pushInteger(handle);
}

EXPORT(int) primB3dProcessIndexedQuads(void) {
    int vtxCount;
    void *vtxArray;
    int idxCount;
    int texHandle;
    void *idxArray;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	idxCount = stackIntegerValue(0);
	vtxCount = stackIntegerValue(2);
	texHandle = stackIntegerValue(4);
	if (failed()) {
		return null;
	}
	vtxArray = stackPrimitiveVertexArrayofSize(3, vtxCount);
	idxArray = stackPrimitiveIndexArrayofSizevalidateforVertexSize(1, idxCount, 1, vtxCount);
	if ((vtxArray == null) || ((idxArray == null) || (failed()))) {
		return null;
	}
	b3dDrawIndexedQuads(texHandle, vtxArray, vtxCount, idxArray, idxCount / 4);
	return pop(5);
}

EXPORT(int) primB3dProcessIndexedTriangles(void) {
    int vtxCount;
    void *vtxArray;
    int idxCount;
    int texHandle;
    void *idxArray;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	idxCount = stackIntegerValue(0);
	vtxCount = stackIntegerValue(2);
	texHandle = stackIntegerValue(4);
	if (failed()) {
		return null;
	}
	vtxArray = stackPrimitiveVertexArrayofSize(3, vtxCount);
	idxArray = stackPrimitiveIndexArrayofSizevalidateforVertexSize(1, idxCount, 1, vtxCount);
	if ((vtxArray == null) || ((idxArray == null) || (failed()))) {
		return null;
	}
	b3dDrawIndexedTriangles(texHandle, vtxArray, vtxCount, idxArray, idxCount / 3);
	return pop(5);
}

EXPORT(int) primB3dProcessPolygon(void) {
    int vtxCount;
    void *vtxArray;
    int texHandle;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	vtxCount = stackIntegerValue(0);
	texHandle = stackIntegerValue(2);
	if (failed()) {
		return null;
	}
	vtxArray = stackPrimitiveVertexArrayofSize(1, vtxCount);
	if ((vtxArray == null) || (failed())) {
		return null;
	}
	b3dDrawPolygon(texHandle, vtxArray, vtxCount);
	return pop(5);
}

EXPORT(int) primB3dSetViewport(void) {
    int oop;
    int x0;
    int x1;
    int y0;
    int y1;
    int p1;
    int p2;

	if (!(fDirectXEnabled)) {
		return primitiveFail();
	}
	oop = stackObjectValue(0);
	if (failed()) {
		return null;
	}
	if (!(isPointers(oop))) {
		return primitiveFail();
	}
	if ((slotSizeOf(oop)) < 2) {
		return primitiveFail();
	}
	p1 = fetchPointerofObject(0, oop);
	p2 = fetchPointerofObject(1, oop);
	if (!((fetchClassOf(p1)) == (classPoint()))) {
		return primitiveFail();
	}
	if (!((fetchClassOf(p2)) == (classPoint()))) {
		return primitiveFail();
	}
	x0 = fetchIntegerofObject(0, p1);
	y0 = fetchIntegerofObject(1, p1);
	x1 = fetchIntegerofObject(0, p2);
	y1 = fetchIntegerofObject(1, p2);
	if (failed()) {
		return null;
	}
	b3dSetViewport(x0, y0, x1 - x0, y1 - y0);
	return pop(1);
}

/***************************************************************************
 ***************************************************************************
					Squeak support primitives
 ***************************************************************************
 ***************************************************************************/

#define SQUEAK_SUPPORT_PRIMS

#ifdef SQUEAK_SUPPORT_PRIMS

__declspec(dllexport) int win32DirectXEnable(void)
{
	int nArgs, oop, enable;
	nArgs = methodArgumentCount();
	if(nArgs > 1) return primitiveFail();
	if(nArgs == 1) {
		oop = stackObjectValue(0);
		if(failed()) return 0;
		enable = booleanValueOf(oop);
		if(failed()) return 0;
		fDirectXEnabled = enable;
		pop(1);
	}
	pop(1);
	pushBool(fDirectXEnabled);
	return 1;
}

__declspec(dllexport) int win32DirectXDisplayDepth(void)
{
	int depth = 0;
	if((methodArgumentCount() != 0))
		return primitiveFail();
	switch(dwDisplayBitDepth) {
		case DDBD_16: depth = 16; break;
		case DDBD_24: depth = 24; break;
		case DDBD_32: depth = 32; break;
	}
	pop(1);
	push(integerObjectOf(depth));
	return 1;
}

__declspec(dllexport) int win32DirectXSmartClipper(void)
{
	int oop, smart;

	if(!fDirectXEnabled || (methodArgumentCount() != 1)) 
		return primitiveFail();
	oop = stackObjectValue(0);
	if(failed()) return 0;
	smart = booleanValueOf(oop);
	if(failed()) return 0;
	fDirectXSmartClipper = smart;
	pop(1);
	pushBool(fDirectXSmartClipper);
	return 1;
}

__declspec(dllexport) int win32DirectXUseClipper(void)
{
	HRESULT hRes;
	int oop, use;

	if(!fDirectXEnabled || (methodArgumentCount() != 1)) 
		return primitiveFail();
	oop = stackObjectValue(0);
	if(failed()) return 0;
	use = booleanValueOf(oop);
	if(failed()) return 0;
	if(use) {
		hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, lpddClipper);
		if(!FAILED(hRes)) fClipperAttached = 1;
	} else {
		hRes = lpddPrimary->lpVtbl->SetClipper(lpddPrimary, NULL);
		if(!FAILED(hRes)) fClipperAttached = 0;
	}
	pop(1);
	pushBool(fClipperAttached);
	return 1;
}

#endif

#endif
