// compile.cc -- dynamic compiler
//
// Author: ian.piumarta@inria.fr
//
// Last edited: 2/26/01 by marcus@ira.uka.de

// BUGS:
//   
//   The code generation loop should be factored into two functions:
//   method translator and block translator (the latter of which can
//   call itself recursively).
//   
//   Code generation should be done in one pass.
//   
//   Everything should use native stack frames.  (This is by far the
//   most serious bug -- it's not far off being cured though... ;-)


// A note about method/native memoizers, specialisation, and the method
// cache.
// 
// The {method,native}Memoizers might smell a bit like a secondary method
// cache -- they aren't.  They differ from the method cache in several
// significant respects.
// 
// 1. The method cache associates a { selector X lookupClass } pair with a
// CompiledMethod.  Complications due to superclass sends are entirely
// relevant.
// 
// 2. The memoizers associate a { receiver X CompiledMethod } pair with a
// NativeMethod; the selector has nothing whatsoever to do with this
// association.  Since the memoizers are only consulted after method cache
// lookup, complications due to superclass sends are utterly irrelevant to
// the memoizers.  Their role is rather to associate an object's
// implementation of a given method (the result previous method cache
// lookup) with a particular NATIVE implementation of that method.  This
// might depend on the receiver's nature (its class, indexability, etc.)
// and/or the CompiledMethod (its primitive index, etc.), but it has nothing
// whatsoever to do with the way the object chooses its response for a given
// selector.
// 
// 3. Several message handling limitations in the Interpreter are no longer
// relevant to the memoizers.  One example: the Interpreter can only use the
// at cache for non-superclass sends of #at:[put:] (which EXCLUDES
// #basicAt:[put:]) since the response is based on the SELECTOR and the
// receiver.  In the memoizers, the response is based on the destination
// CompiledMethod and the receiver, NOT on the selector or the nature
// (normal/superclass) of the send.  Since we know with certainty that the
// destination method (be it #at:[put:] or #basicAt:[put:] or whatever) is
// the intended behaviour of the message send, we can use the at cache (or
// any other caching mechanism) with impunity -- since implementing the
// correct behaviour, whatever the means, cannot break the program.  (We
// might even define a message #frob: with primitive index 60, and the
// memoizers will return a NativeMethod suitably specialised that runs the
// equivalent of Object>>at: with NO loss of efficiency compared to the
// primitive methods already bound to #at:.)
// 
// Hence:
// 
// 4. The receiverClass() of a NativeMethod is NOT intended in any way shape
// or form to be related with its selector() to effect (or in any way
// affect) the choice of method to be run.  This choice has already been
// made in the method cache lookup that led to the translation of the
// NativeMethod and its subsequent inclusion in the memoizers.  The
// receiverClass() is there simply to record the class associated with any
// specialisation that might have been performed in the NativeMethod (the
// species of inline cache that it uses, for example).  The lkupClass that
// was used in the method cache lookup is therefore also completely
// irrelevant to the compilation and memoization mechanisms.
// 
// 5. Any optimisations that we do must not affect the program's execution
// in any way that might alter the behaviour of the method cache itself,
// which DOES depend on several things that native code is expected to
// provide via relink glue: the selector() of a NativeMethod and the inline
// cache "supered" flag, for example.


#include "compile.h"
#include "NativeMethod.h"	// subsumes: Memoizer, PcMap, Cache, Object

#include "SamOp.h"
#include "specials.h"

#include "archdep.h"
#include "optimise.h"
#include "generate.h"
#include "genArray.h"

#include "xmalloc.h"

#include <string.h>

#undef DEBUG

///
/// CODE GENERATION OPTIONS
///


// initial largest method size (measured in SAM operations, well over
// twice the largest method size in the 2.6 image)
#define	CODE_MAX	2048

// initial maximum number of blocks that can appear in a method
// (largest number of blocks per method in 2.6 is 33)
#define	BLOCK_MAX	64

// rewrite LdLit with SmallInteger literal as LdInt (apart from being
// faster, helps the optimiser produce much better code for conditionals)
#define	OPTIMISE_LITERALS

// eliminate jump chains and rewrite range-limited inverted
// conditional jumps
#undef	OPTIMISE_JUMPS

// delete unused jumps during jump optimisation
#define	DELETE_JUMPS

// delete unreachable code after unconditional jumps/returns
// (note: undefining this [as of 2.8] breaks the translator
// because of bad bytecode generated by the Compiler)
#define	DELETE_UNREACHABLE

// translate a different NativeMethod for each receiver of a given
// CompiledMethod (this allows short-circuiting of sends to self, and
// even replacing them entirely with inline code corresponding to
// quick responses).  NOTE: this should be a command-line option!
#undef	SPECIALISE_METHODS

// Translate a different NativeMethod for each cache type (tagged,
// compact, non-compact) of a given CompiledMethod.  This allows
// shorter prologues, since only one of the three possibilities needs
// to be implemented.
#undef	SPECIALISE_ICACHE

// Translate a different NativeMethod for a given CompiledMethod when
// the receiver is a Float.  This allows shorter and faster prologues,
// since only one of the three possibilities needs to be implemented
// and it can be specialised on Float's signature.
#undef	SPECIALISE_FLOATS

// put forward branch destinations in maps (almost never useful)
#undef	COMPLETE_MAP


///
/// DEBUGGING OPTIONS
///


// verify that backwards conditional jump destinations have the same
// stack depth as the insn following the jump
#define	VERIFY_JUMPS

// print the name of the method before translation
#undef	ASM_LIST_METHOD

// generate listings during translation:
//   - raw SAM code generated from CompiledMethod bytecodes
//   - optimised SAM code after jump optimisations
//   - first and second codegen passes
#undef	ASM_LISTING

// dump the contents of the stack after each SAMcode in listing
#undef	ASM_STACK

// generate assembly listings during final code generation
// (works only if lib{iberty,opcode,bfd} are available)
#undef	DSM_LISTING

// print warnings for detectable anomalies (e.g. unreachable code)
#define ASM_WARNINGS

// print warnings if a method returns with extraneous items on the stack
#define	RETURN_WARNINGS
#define RETURN_FUZZ 0	// the amount we can tolerate: none at all since
			// "balanced" stacks were assumed in the Compiler

// disable all debugging options if compiling for "production" VM
#ifdef NDEBUG
# undef	VERIFY_JUMPS
# undef	ASM_WARNINGS
# undef	RETURN_WARNINGS
#endif

NativeMethod *nativeMethod= 0;
void	     *nativeIP= 0;

unsigned int gen_hTotal= 0;
unsigned int gen_nTotal= 0;
unsigned int gen_vTotal= 0;

extern int withSpy;
extern void spyTranslation(Class *cls, oop selector);
extern void spyTranslated(void);

static SamOp *sam= 0;		// buffer for SAM code
static size_t samSize= 0;	// max size (SAM insns)


#define printMethodName() \
  printNameOfClasscount(receiverClass, 5); \
  printf("("); \
  printNameOfClasscount(methodClass, 5); \
  printf(")>>"); \
  printStringOf(messageSelector);


#ifdef ASM_WARNINGS
# define warningIf(cond, msg, spc) \
    if (cond) { \
      fflush(stdout); \
      fprintf(stderr, "WARNING: "); printMethodName(); printf("\n"); \
      fflush(stdout); \
      fprintf(stderr, "WARNING: %s at vPC=%d\n", msg, sam[spc].vPC); \
    }
#else
# define warningIf(cond, msg, vpc)
#endif


#if 0

// can be called up to four times before static buffers collide...
//
static char *binary(unsigned long bits)
{
  static char bufs[4][sizeof(unsigned long) * 8 + 1];
  static size_t bidx= 0;
  char *buf= bufs[bidx++ & 3];
  size_t index= 0;
  for (unsigned long mask= (1<<5); mask != 0; mask>>= 1)
    buf[index++]= ((bits & mask) ? '1' : '0');
  buf[index]= '\0';
  return buf;
}

#endif


void compile_release(void)
{
  if (sam != 0)
    {
      xfree(sam);
      sam= 0;
      samSize= 0;
    }
}


// called when filling a method cache line, with:
//	messageSelector	= the method's selector
//	receiverClass	= the class of the receiver
//	lkupClass	= the class in which lookup started (!= rcvrClass for super)
//	newMethod	= the corresponding CompiledMethod
//	methodClass	= the class in which the newMethod is defined
//	primitiveIndex	= the primitive index (or 0) of newMethod
//
// find a native method for the given newMethod/lkupClass combination,
// and store the result in nativeMethod.
// 
// Note: this method is called in a GC-friendly environment.

void gen_compile(void)
{
  assert(receiverClass != ClassPseudoContext);

  const bool inContext= ((receiverClass == ClassMethodContext)
			 || (receiverClass == ClassBlockContext));

  // first look for a memoized response
  MemoIndex methodIndex= methodMemoizer->indexOf(newMethod);	// may cause GC!

  // determine the specialisation behaviour
  Class *specClass= 0;
  unsigned specCache= 0;

  // ALWAYS specialise for sends ...
  if (inContext)				// ... to Contexts
    {
      specClass= receiverClass;
      specCache= ixCacheType;
    }
# if defined(INLINED_AT) || defined(CACHED_AT)
  else if ((primitiveIndex != 0) && (receiverClass->hasIndexableInstances()))
    {
      switch (primitiveIndex)
	{
	case 60: case 61:			// ... of Object>>at:[put:]
	case 62:				// ... of Object>>size
	case 63: case 64:			// ... of String>>at:[put:]
	  specClass= receiverClass;
	  specCache= ixCacheType;
	  break;
	}
    }
# endif // INLINED_AT || CACHED_AT
# ifdef SPECIALISE_FLOATS
  else if (receiverClass == ClassFloat)
    {
      specClass= receiverClass;
      specCache= ixCacheType;
    }
# endif
  if ((specCache == 0) && (primitiveIndex != 0))
    {
      switch (primitiveIndex)
	{
	case 70:				// ... of Behavior>>new
	case 71:				// ... of Behavior>>new:
	  specClass= receiverClass;
	  specCache= ixCacheType;
	  break;
	}
    }
# ifdef SPECIALISE_ICACHE
  if (specCache == 0)
    {
#     ifdef SPECIALISE_METHODS
      specClass= receiverClass;
      specCache= ixCacheType; //NativeMethod::cacheTypeForClass(receiverClass);
#     else
      specCache= NativeMethod::cacheTypeForClass(receiverClass);
#     endif
    }
# endif

# ifdef DEBUG
  printf("specClass = ");  if (specClass) specClass->print(); else printf("<nil>");
  printf("\n");
  printf("specCache = %d\n", specCache);
# endif

  NativeMethod *pMeth= 0;
  NativeMethod *nMeth= 0;
  
  pMeth= nativeMemoizer->atOrNil(methodIndex);	// probed method

# ifdef DEBUG
  printf("memoizer probe => "); if (pMeth) pMeth->print(); else printf("<nil>");
  printf("\n");
# endif

  if (pMeth != 0)
    nMeth= pMeth->specialised(specClass, specCache);
  
# ifdef DEBUG
  printf("specialise => "); if (nMeth) nMeth->print(); else printf("<nil>");
  printf("\n");
# endif

  // we might have a Context method for non-Context non-specialised receiver class

  if (specClass == 0)	// guaranteed non-Context receiver class
    {
      while (nMeth != 0)
	{
	  Class *mrClass= nMeth->receiverClass();
	  assert(mrClass != ClassPseudoContext);
	  if ((mrClass == ClassMethodContext) || (mrClass == ClassBlockContext))
	    {
	      nMeth= nMeth->next;
	      if (nMeth != 0)
		nMeth= nMeth->specialised(specClass, specCache);
	      printf("non context => "); if (nMeth) nMeth->print(); else printf("<nil>");
	      printf("\n");
	    }
	  else
	    break;	// method suitable for receiverClass
	}
    }

  if (nMeth != 0)
    {
      newNativeMethod= nMeth;
#     ifdef ASM_LISTING
      printf("memoized %s ", methodIndex.printString());
      printMethodName();
      printf(" -> ");
      printNameOfClasscount(newNativeMethod->methodClass(), 5);
      printf(">>");
      printStringOf(newNativeMethod->selector());
      printf("\n");
      fflush(stdout);
#     endif
      return;
    }

#if defined(ASM_LISTING) || defined(ASM_LIST_METHOD)
  printf("translating %s #%d ", methodIndex.printString(), specCache);
  printMethodName();
  printf(" [%d,%d]\n", newMethod->initialPC(), newMethod->endPC());
#endif

  if (withSpy)
    {
      spyTranslation(receiverClass, messageSelector);
      extern unsigned genCount;
      ++genCount;
    }

  // no memoized response: create a new native method

  size_t startPC= newMethod->initialPC();
  size_t endPC= newMethod->endPC();
  size_t byteSize= endPC - startPC + 1
    + 1  // method entry
    + 1; // cannot return off end
  size_t codeSize= gen_codeSize(byteSize);

  nMeth= new(codeSize) NativeMethod(byteSize);

  MemoIndex selectorIndex= selectorMemoizer->indexOf(messageSelector);
  MemoIndex receiverClassIndex= classMemoizer->indexOf(receiverClass);
  MemoIndex methodClassIndex= classMemoizer->indexOf(methodClass);

# ifdef ASM_LISTING
  printf("receiver class %s, method class %s, selector %s\n",
	 receiverClassIndex.printString(),
	 methodClassIndex.printString(),
	 selectorIndex.printString());
# endif

  nMeth->next= pMeth;	// enter into specialisation chain

  nMeth->cacheType= specCache;

  nMeth->methodIndex= methodIndex;
  nMeth->selectorIndex= selectorIndex;
  nMeth->methodClassIndex= methodClassIndex;
  nMeth->receiverClassIndex= receiverClassIndex;
  nMeth->argumentCount= newMethod->argCount();
  nMeth->temporaryCount= newMethod->tempCount()
    - nMeth->argumentCount;		// fais gaffe : args NON compris !
  nMeth->primitiveIndex= newMethod->primitiveIndex();

  if (samSize < endPC)	// conservative guess
    {
      samSize= endPC * 4;
      // this should never happen: print a message indicating that
      // CODE_MAX should be increased in the source
      //printf("growing SamOp buffer to %d entries\n", samSize);
      if (sam != 0) xfree((void *)sam);
      sam= (SamOp *)xmalloc(samSize * sizeof(SamOp));
      if (sam == 0) fatal("out of memory");
    }

  size_t jumps[CODE_MAX];	// SAM addresses of jump insn destinations
  size_t nJumps= 0;

  size_t blocks[BLOCK_MAX];	// SAM addresses of block prologue entry points
  size_t nBlocks= 0;

  size_t vPC= 0;
  size_t samPC= 0;

  const bool receiverTag= ((specCache == 1) || (methodClass == ClassSmallInteger));

  /**************** PASS 1: CompiledMethod -> SAM ****************/


# define fetchByte()	newMethod->stByteAt(vPC++)

# define genOp(OP, ARG, FLAGS) {	\
    sam[samPC].opcode= OP;		\
    sam[samPC].arg=    ARG;		\
    sam[samPC].flags=  FLAGS;		\
    ++samPC;				\
  }

# define gen(OP, ARG)		genOp(OP, ARG, 0)
# define genEnd(OP, ARG)	genOp(OP, ARG, SamOp::bendFlag)

  memset((void *)sam, 0, byteSize * sizeof(SamOp));

  //gen(opDeleted, 0);

  for (vPC= startPC; vPC <= endPC;)
    {
      assert(samPC < byteSize);
      sam[samPC].vPC= vPC;

      byte op= fetchByte();

      switch (op)
	{
	  // pushReceiverVariableBytecode
	case 0: case 1: case 2:  case 3:  case 4:  case 5:  case 6:  case 7: 
	case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
	  gen(opLdInst, op & 15);
	  break;
	  // pushTemporaryVariableBytecode
	case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
	case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
	  gen(opLdTemp, op & 15);
	  break;
	  // pushLiteralConstantBytecode
	case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39:
	case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
	case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55:
	case 56: case 57: case 58: case 59: case 60: case 61: case 62: case 63:
	  gen(opLdLit, op & 31);
	  break;
	  // pushLiteralVariableBytecode
	case 64: case 65: case 66: case 67: case 68: case 69: case 70: case 71:
	case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79:
	case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87:
	case 88: case 89: case 90: case 91: case 92: case 93: case 94: case 95:
	  gen(opLdLitInd, op & 31);
	  break;
	  // storeAndPopReceiverVariableBytecode
	case 96: case 97: case 98: case 99: case 100: case 101: case 102: case 103:
	  gen(opPopInst, op & 7);
	  break;
	  // storeAndPopTemporaryVariableBytecode
	case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111:
	  gen(opPopTemp, op &7);
	  break;
	  // miscellaneous bytecodes
	case 112: gen(opLdSelf,  0); break;
	case 113: gen(opLdTrue,  0); break;
	case 114: gen(opLdFalse, 0); break;
	case 115: gen(opLdNil,   0); break;
	  // load integer bytecodes
	case 116: gen(opLdInt, -1); break;
	case 117: gen(opLdInt,  0); break;
	case 118: gen(opLdInt,  1); break;
	case 119: gen(opLdInt,  2); break;
	  // return bytecodes
	case 120: genEnd(opRetSelf,   0); break;
	case 121: genEnd(opRetTrue,   0); break;
	case 122: genEnd(opRetFalse,  0); break;
	case 123: genEnd(opRetNil,    0); break;
	case 124: genEnd(opRetMethod, 0); break;
	case 125: genEnd(opRetBlock,  0); break;
	  // unused bytecodes
	case 126: fatal("unknown bytecode, vPC = %d", vPC); break;
	case 127: fatal("unknown bytecode, vPC = %d", vPC); break;
	  // extended push
	case 128: {
	  const int descriptor= fetchByte();
	  const int variableType= (descriptor >> 6) & 0x03;
	  const int variableIndex=  descriptor & 0x3f;
	  switch (variableType)
	    {
	    case 0:  gen(opLdInst,   variableIndex); break;
	    case 1:  gen(opLdTemp,   variableIndex); break;
	    case 2:  gen(opLdLit,    variableIndex); break;
	    case 3:  gen(opLdLitInd, variableIndex); break;
	    default: fatal("this cannot happen");    break;
	    }
	  break;
	}
	  // extended store
	case 129: {
	  const int descriptor= fetchByte();
	  const int variableType= (descriptor >> 6) & 0x03;
	  const int variableIndex= descriptor & 0x3f;
	  switch (variableType)
	    {
	    case 0:  gen(opStInst,   variableIndex); break;
	    case 1:  gen(opStTemp,   variableIndex); break;
	    case 2:  fatal("illegal store");	     break;
	    case 3:  gen(opStLitInd, variableIndex); break;
	    default: fatal("this cannot happen");    break;
	    }
	  break;
	}
	  // extended pop
	case 130: {
	  const int descriptor= fetchByte();
	  const int variableType= (descriptor >> 6) & 0x03;
	  const int variableIndex= descriptor & 0x3f;
	  switch (variableType)
	    {
	    case 0:  gen(opPopInst,   variableIndex); break;
	    case 1:  gen(opPopTemp,   variableIndex); break;
	    case 2:  fatal("illegal store");	      break;
	    case 3:  gen(opPopLitInd, variableIndex); break;
	    default: fatal("this cannot happen");     break;
	    }
	  break;
	}
	  // single extended send
	case 131: {
	  const int descriptor= fetchByte();
	  const int selIndex= (descriptor & 0x1f);
	  const int nArgs= descriptor >> 5;
	  gen(opSend, (selIndex << 8) | nArgs);
	  break;
	}
	  // double extended do all
	case 132: {
	  const int byte2= fetchByte();
	  const int byte3= fetchByte();
	  const int opType= byte2 >> 5;
	  switch (opType)
	    {
	    case 0: {
	      const int selIndex= byte3;
	      const int nArgs= byte2 & 0x1f;
	      gen(opSend, (selIndex << 8) | nArgs);
	      break;
	    }
	    case 1: {
	      const int selIndex= byte3;
	      const int nArgs= byte2 & 0x1f;
	      gen(opSuper, (selIndex << 8) | nArgs);
	      break;
	    }
	    case 2: gen(opLdInst,   byte3);	  break;
	    case 3: gen(opLdLit,    byte3);	  break;
	    case 4: gen(opLdLitInd, byte3);	  break;
	    case 5: gen(opStInst,   byte3);	  break;
	    case 6: gen(opPopInst,  byte3);	  break;
	    case 7: gen(opStLitInd, byte3);	  break;
	    default: fatal("this cannot happen"); break;
	    }
	  break;
	}
	  // single extended super
	case 133: {
	  const int descriptor= fetchByte();
	  const int selIndex= (descriptor & 0x1f);
	  const int nArgs= descriptor >> 5;
	  gen(opSuper, (selIndex << 8) | nArgs);
	  break;
	}
	  // second extended send
	case 134: {
	  const int descriptor= fetchByte();
	  const int selIndex= (descriptor & 0x3f);
	  const int nArgs= descriptor >> 6;
	  gen(opSend, (selIndex << 8) | nArgs);
	  break;
	}
	  // stack bytecodes
	case 135: gen(opPop, 0); break;
	case 136: gen(opDup, 0); break;
	case 137: gen(opLdThisContext, 0); break;
	  // experimentalBytecode
	case 138: case 139: case 140: case 141: case 142: case 143:
	  fatal("experimental bytecode, vPC = %d", vPC);
	  break;
	  // shortUnconditionalJump
	case 144: case 145: case 146: case 147: case 148: case 149: case 150: case 151: {
	  jumps[nJumps++]= samPC;
	  const unsigned dest= vPC + (op - 143);
	  assert(dest > vPC);
	  assert(dest <= endPC);
	  genEnd(opJmp, dest);
	  break;
	}
	  // shortConditionalJump
	case 152: case 153: case 154: case 155: case 156: case 157: case 158: case 159: {
	  jumps[nJumps++]= samPC;
	  const unsigned dest= vPC + (op - 151);
	  assert(dest > vPC);
	  assert(dest <= endPC);
	  gen(opJmpF, dest);
	  break;
	}
	  // longUnconditionalJump
	case 160: case 161: case 162: case 163: case 164: case 165: case 166: case 167: {
	  jumps[nJumps++]= samPC;
	  const unsigned offset= fetchByte();
	  const unsigned dest= vPC + ((op - 164) * 256) + offset;
	  assert(dest >= startPC);
	  assert(dest <= endPC);
	  genEnd(opJmp, dest);
	  break;
	}
	  // longJumpIfTrue
	case 168: case 169: case 170: case 171: {
	  jumps[nJumps++]= samPC;
	  const unsigned offset= fetchByte();
	  const unsigned dest= vPC + ((op - 168) * 256) + offset;
	  assert(dest >= vPC);
	  assert(dest <= endPC);
	  gen(opJmpT, dest);
	  break;
	}
	  // longJumpIfFalse
	case 172: case 173: case 174: case 175: {
	  jumps[nJumps++]= samPC;
	  const int offset= fetchByte();
	  const unsigned dest= vPC + ((op - 172) * 256) + offset;
	  assert(dest >= startPC);
	  assert(dest <= endPC);
	  gen(opJmpF, dest);
	  break;
	}
	  // arithmetic and special send bytecodes	// special index...
	case 176: gen(opAdd,            0); break;	//  0
	case 177: gen(opSubtract,       0); break;	//  1
	case 178: gen(opLessThan,       0); break;	//  2
	case 179: gen(opGreaterThan,    0); break;	//  3
	case 180: gen(opLessOrEqual,    0); break;	//  4
	case 181: gen(opGreaterOrEqual, 0); break;	//  5
	case 182: gen(opEqual,          0); break;	//  6
	case 183: gen(opNotEqual,       0); break;	//  7
	case 184: gen(opMultiply,       0); break;	//  8
	case 185: gen(opDivide,         0); break;	//  9
	case 186: gen(opMod,            0); break;	// 10
	case 187: gen(opMakePoint,      0); break;	// 11
	case 188: gen(opBitShift,       0); break;	// 12
	case 189: gen(opDiv,            0); break;	// 13
	case 190: gen(opBitAnd,         0); break;	// 14
	case 191: gen(opBitOr,          0); break;	// 15
	case 192: gen(opAt,             0); break;	// 16
	case 193: gen(opAtPut,          0); break;	// 17
	case 194: gen(opSize,           0); break;	// 18
	case 195: gen(opNext,           0); break;	// 19
	case 196: gen(opNextPut,        0); break;	// 20
	case 197: gen(opAtEnd,          0); break;	// 21
	case 198: gen(opEquivalent,     0); break;	// 22
	case 199: gen(opClass,          0); break;	// 23
	case 200: gen(opBlockCopy,      0); break;	// 24
	case 201: gen(opValue,          0); break;	// 25
	case 202: gen(opValueWithArg,   0); break;	// 26
	case 203: gen(opDo,             0); break;	// 27
	case 204: gen(opNew,            0); break;	// 28
	case 205: gen(opNewWithArg,     0); break;	// 29
	case 206: gen(opPointX,         0); break;	// 31
	case 207: gen(opPointY,         0); break;	// 32
	  // sendLiteralSelectorBytecode
	case 208: case 209: case 210: case 211: case 212: case 213: case 214: case 215:
	case 216: case 217: case 218: case 219: case 220: case 221: case 222: case 223:
	  gen(opSend, ((op & 15) << 8) | 0);
	  break;
	case 224: case 225: case 226: case 227: case 228: case 229: case 230: case 231:
	case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239:
	  gen(opSend, ((op & 15) << 8) | 1);
	  break;
	case 240: case 241: case 242: case 243: case 244: case 245: case 246: case 247:
	case 248: case 249: case 250: case 251: case 252: case 253: case 254: case 255:
	  gen(opSend, ((op & 15) << 8) | 2);
	  break;
	default:  fatal("unimplemented bytecode");
	} // switch (bytecode)
#     ifdef ASM_LISTING
      printf("%3d: %s\n", samPC - 1, sam[samPC-1].printString());
#     endif
    } // for (startPC <= vPC <= endPC)

  size_t samEnd= samPC;

  // start-of-method marker:
  //   ckptFlag for initial method PC
  sam[0].beCkpt();

  // end-of-method marker:
  //   ckptFlag for run-off vPCs, e.g: #cannotReturn
  //   destFlag to squash "unreachable code" diagnostic
  sam[samPC].vPC= vPC;
  genOp(opDeleted, 0, SamOp::ckptFlag);
  sam[samEnd].join();	// avoid unreachable code warnings from final Ret

  // samEnd points to the ungenerated insn following the final opDeleted

# undef gen
# undef fetchByte

# ifdef ASM_LISTING
  printf("---------------- RAW SAM:\n");
  {
    for (size_t i= 0; i < samEnd; ++i)
      printf("%3d: %s\n", i, sam[i].printString());
  }
# endif


  /**************** PASS 1b: resolve and mark jump destinations ****************/


  {
    for (size_t i= 0; i < nJumps; ++i)
      {
	samPC= jumps[i];
	register half vDest= (half)(sam[samPC].arg);
#     ifdef ASM_LISTING
	printf("resolve: %d -> %d\n", samPC, vDest);
#     endif
	// find sam insn corresponding to vDest
	// NOTE: indices must be ints, since low can descend to -1
	register int low= 0;
	register int high= samEnd - 1;
	register int idx;
	while ((idx= (high + low) >> 1), (low <= high))
	  if (sam[idx].vPC < vDest)
	    low= idx + 1;
	  else
	    high= idx - 1;
	if ((low >= (int)samEnd) || (sam[low].vPC != vDest))
	  fatal("destination %d not found in method", vPC);
	sam[samPC].arg= low;
	sam[low].join();			// mark as destination
#if   0 // this is perfectly normal...
	if (sam[low].joinCount > 1) {
	  printf("WARNING: joinCount == %d at samPC %d\n", sam[low].joinCount, low);
	}
#     endif
      }
  }


  /**************** PASS 1c: jump optimisation ****************/


# ifdef OPTIMISE_JUMPS

  {
    bool stable;
    do
      {
	stable= true;
	for (size_t i= 0; i < nJumps; ++i)
	  {
	    samPC= jumps[i];

	    //	JmpF l			   JmpT d
	    //	Jmp  d		=>	   <deleted>
	    // l: ...			l: ...

	    if ((sam[samPC].opcode      == opJmpF)
		&& (sam[samPC].arg      == (int)samPC+2)
		&& (sam[samPC+1].opcode == opJmp))
	      {
		assert(sam[samPC+2].isDest());
		size_t finalDest= sam[samPC+1].arg;
		assert(finalDest != samPC);		// we'd have an infinite loop
#	        ifdef ASM_LISTING
		printf("OPT: %3d: JmpF Jmp -> JmpT\n", samPC);
#	        endif
		sam[samPC].opcode= opJmpT;		// JmpF l    :=  JmpT l
		sam[samPC].arg= finalDest;		// JmpT l    :=  JmpT dest
		sam[finalDest].join();
		sam[samPC+2].unjoin();
		if (!sam[samPC+1].isDest())
		  {
		    sam[samPC+1].beDeleted();		// Jmp dest  :=  <deleted>
		    sam[finalDest].unjoin();
		  }
		stable= false;
	      }

	    //	JmpT l			   JmpF d
	    //	Jmp  d		=>	   <deleted>
	    // l: ...			l: ...

	    if ((sam[samPC].opcode      == opJmpT)
		&& (sam[samPC].arg      == (int)samPC+2)
		&& (sam[samPC+1].opcode == opJmp))
	      {
		// (these are VERY rare)
		assert(sam[samPC+2].isDest());
		size_t finalDest= sam[samPC+1].arg;
		assert(finalDest != samPC);		// we'd have an infinite loop
#	        ifdef ASM_LISTING
		printf("OPT: %3d: JmpT Jmp -> JmpF\n", samPC);
#	        endif
		sam[samPC].opcode= opJmpF;		// JmpF l    :=  JmpT l
		sam[samPC].arg= finalDest;		// JmpT l    :=  JmpT dest
		sam[finalDest].join();
		sam[samPC+2].unjoin();
		if (!sam[samPC+1].isDest())
		  {
		    sam[samPC+1].beDeleted();		// Jmp dest  :=  <deleted>
		    sam[finalDest].unjoin();
		  }
		stable= false;
	      }

	    // Jmp* L1 ... L1: Jmp  L2 => Jmp* L2

	    {
	      size_t firstDest= sam[samPC].arg;
	      if (sam[firstDest].opcode    == opJmp)
		{
		  size_t secondDest= sam[firstDest].arg;

		  assert(firstDest != secondDest);	// we'd have an infinite loop
		  assert(sam[firstDest].isDest());
		  assert(sam[secondDest].isDest());
#		  ifdef ASM_LISTING
		  printf("OPT: %3d: Jmp %d -> Jmp* %d  :=  Jmp* %d\n",
			 samPC, firstDest, secondDest, secondDest);
#	          endif
		  sam[samPC].arg= secondDest;		// propagate jump destination
		  sam[secondDest].join();		// new path into destination
		  sam[firstDest].unjoin();		// original path deleted
		  stable= false;
		}
	    }

	    // delete unreachable jumps: not marked as destinations and
	    // preceded by either a jump, return, or deleted opcode
#	    ifdef DELETE_JUMPS
	    if ((sam[samPC].opcode != opDeleted)
		&& (!sam[samPC].isDest())
		&& (samPC > 0)
		&& ((sam[samPC - 1].isControl())
		    || sam[samPC - 1].opcode == opDeleted))
	      {
		size_t dest= sam[samPC].arg;
#	        ifdef ASM_LISTING
		printf("     %3d: Jmp* %d := DELETED\n",
		       samPC, dest);
#	        endif
		sam[samPC].beDeleted();
		sam[dest].unjoin();
		stable= false;
	      }
#	    endif // DELETE_JUMPS
	  } // for (samPC in jumps)
      } // do { ... jump optimisations ... }
    while (!stable);
  }

# ifdef ASM_LISTING
  printf("---------------- AFTER JUMP OPTIMISATION:\n");
  {
    for (size_t i= 0; i < samEnd; ++i)
      printf("%3d: %s\n", i, sam[i].printString());
  }
# endif

# endif // OPTIMISE_JUMPS

  /**************** PASS 2+3: type analysis and code generation ****************/


  // Note #1: this loop is compatible with both the old gen_<arch>.hg
  // generators (fixed native code for each opcode) and with the new
  // optimising back end (flow and type analyses in the first pass,
  // intelligent code in the second).  It should stay that way!  Since
  // type analysis tracks control flow through jumps and across other
  // basic block boundaries (e.g. EVERY message send and special
  // arithmetic bytecode), we generate code that could break (however
  // rarely) if the stack changes "under our feet".  That means we
  // MUST be able to revert any Context to the unoptimised version of
  // its method "on demand" whenever the stack is modified from
  // Smalltalk (e.g. the Debugger, or [more perniciously] sending
  // #fromBraceStack: to a Collection).  This means we must be able to
  // rerun codegen on a method a second time, using the old-style
  // fixed code generator -- which is immune to a non-deterministic
  // stack.  A possible (and preferable) alternative would be to
  // modify the optimiser to make it optionally discard type
  // information at every basic block boundary (which would keep a
  // significant fraction of the important optimisations, and also
  // yield code that is safe in the presence of an unpredictable
  // stack).
  //
  // (Note that there is also a problem with self-modifying methods
  // [of which there is one example in the exception system -- thanks
  // a bundle, Craig!], but that's a slightly different problem.)

  // Note #2: this MUST be done in two passes since the type analysis
  // in the optimiser will affect the code that is generated for
  // almost every opcode -- and in particular that for conditional
  // jumps, which means there's no simple way of creating fixup lists
  // or chains or whatever to fill in the destination addresses later
  // on.  (Some conditional jumps will even be elided entirely, when
  // the optimiser can figure out a constant Boolean receiver at
  // translation time.)
  //
  // (But now that I've thought about it for a while, it should be
  // possible to do the codegen in one pass -- provided that (a) we
  // pass samPC destinations to the optimiser instead of nPC
  // destinations, (b) that the optimiser itself is willing to manage
  // the fixup lists, and (c) we can somewhow communicate the final
  // corresponding nPC values to the optimiser.  Getting the interface
  // and the optimiser's structures just right would be tricky, but
  // nowhere near impossible.)
  //
  // But what the hey: last time I measured it, this thing was
  // translating an average of 1 million bytecodes (60,000 methods)
  // per second on rusty old 200MHz PPC 603, with the aggressive
  // optimiser enabled.  (The optimiser is probably faster than the
  // old gen_<arch>.hg generators anyway, since it writes out fewer
  // than half the number instructions with less than twice as much
  // overhead).  Could we care less about two passes?  Unlikely.


  // We need a flag to tell us whether to specialse for receivers that
  // might be PseudoContexts.  (Note that regular contexts can change
  // into PseudoContexts, and vice-versa, at any moment and without
  // warning -- so we need to be prepared to deal with this if the
  // receiver is *anything* related to Contexts, pseudo or otherwise.)
  // (This only affects LdInst, StInst, PopInst, and inlined primitive
  // response to #at: and #at:put:.)

  bool compilationErrors= false;

# define noteError()	(compilationErrors= true)

  for (int pass= 1; pass < 3; ++pass)
  {

#   define argWord()	(sam[samPC].arg)
#   define argLo()	(argWord() & 0xff)
#   define argHi()	(argWord() >> 8)

#   define jumpDest(sPC) \
      ((pass == 1) \
	? ((sam[sPC].arg <= (int)samPC) ? asm_pc - 32 : asm_pc + 32) \
	: sam[sam[sPC].arg].nPC)

#   define nextDest(sPC) \
      ((pass == 1) ? asm_pc + 32 : sam[sPC].nPC)

    asm_pass= 0;
    asm_pc= nMeth->start;

    size_t stackDepth= 0;
#  ifndef NDEBUG
    size_t stackLimit= newMethod->frameSize();
#  endif
    size_t blockLimit= 0;	// samPC of insn after outermost block

#   define inBlock()	(samPC < blockLimit)

#   define pushStack()	(++stackDepth)
#   define popStack(N)	(stackDepth-= (N))

#   ifdef ASM_LISTING
    printf("---------------- CODEGEN\nprologue: %p\n", asm_pc);
#   endif

    genReset();

    // PROLOGUE: inline cache, quick/primitive response, activation sequence

    //#warning: FIX THIS
    // Note: we almost certainly only need to generate the prologue
    // once, so it should be moved out of this loop.  Unless, of course,
    // there's any danger that in the future we're going to feed
    // information back from the main body into the prologue; e.g: which
    // subset of the VM state registers actually need initialising.
    // Come to think of it, this information could easily be gathered
    // during the initial SAM generation, so: MOVE THIS DAMN PROLOGUE
    // OUT OF THE LOOP!

#   ifdef DSM_LISTING
    char *dsm_org= (char *)asm_pc;
#   endif

    genPrologue(nMeth, inContext);

#   ifdef DSM_LISTING
    extern int disassemble(void *addr, FILE *stream= stdout);
    if (pass == 2)
      {
	while (dsm_org < (char *)asm_pc)
	  {
	    printf("     %08x\t", (unsigned)dsm_org);
	    dsm_org+= disassemble((void *)dsm_org, stdout);
	    printf("\n");
	  }
      }
#   endif

    // BODY: the stuff that runs activated

    for (samPC= 0; samPC <= samEnd; ++samPC)
    {
#     ifdef ASM_LISTING
      if ((blockLimit == samPC) && (samPC > 0))
	printf("[BLOCK END]\n");
#     endif

      if (sam[samPC].isDest())
	{
	  // give optimiser chance to clean up basic block exit before
	  // assigning nPC of first insn of next basic block
	  genBlockExit();
	}

#     ifdef ASM_LISTING
      printf("%3d: [%d] [%d] %p %s\n", samPC,
	     stackDepth, sam[samPC].stackDepth, asm_pc, sam[samPC].printString());
#     ifdef ASM_STACK
      if (pass == 2)
	opt_dumpStack();
#     endif
#     endif

      assert(stackDepth < stackLimit);

      if (sam[samPC].isComb())
	{
	  stackDepth= sam[samPC].stackDepth;
	  genRecombine(stackDepth, ((samPC > 0) && (sam[samPC-1].isControl())));
	}

      if (pass == 1)
	{
	  sam[samPC].nPC= asm_pc;
#	  ifdef VERIFY_JUMPS
	  // don't stomp on recombination information
	  if (sam[samPC].isDest() && !sam[samPC].isComb())
	    sam[samPC].stackDepth= stackDepth;
#	  endif
	  sam[samPC].nPC= asm_pc;
	}
      else // pass == 2
	{
	  if (sam[samPC].nPC != asm_pc)
	    {
	      fatal("phase error at sPC %d: %p %s",
		    samPC, asm_pc, sam[samPC].printString());
	    }
	  if (sam[samPC].isCkpt())
	    nMeth->notePC(sam[samPC].vPC, sam[samPC].nPC);

#	  ifdef VERIFY_JUMPS
	  // don't stomp on recombination information
	  if (sam[samPC].isDest() && !sam[samPC].isComb())
	    assert(sam[samPC].stackDepth == stackDepth);
#	  endif
	}

      switch (sam[samPC].opcode)
	{
	case opPop:
	  genPop();
	  popStack(1);
	  break;
	    
	case opDup:
	  genDup();
	  pushStack();
	  break;

	case opLdSelf:
	  genLdSelf(inBlock(), receiverTag);
	  pushStack();
	  break;

	case opLdTrue:
	  genLdTrue();
	  pushStack();
	  break;

	case opLdFalse:
	  genLdFalse();
	  pushStack();
	  break;

	case opLdNil:
	  genLdNil();
	  pushStack();
	  break;

	doLdInt:
	case opLdInt:
	  genLdInt(argWord());
	  pushStack();
	  break;

	case opLdThisContext:
	  if ((pass == 1)
	      &&((sam[samPC+1].opcode == opLdInt) || (sam[samPC+1].opcode == opLdLit))
	      && (sam[samPC+2].opcode == opBlockCopy)
	      && (sam[samPC+3].opcode == opJmp) )
	    {
	      // is that a Sear's LdThisContext?
	      int nArgs= sam[samPC+1].arg;
	      const int resume= sam[samPC+3].arg;

	      if (sam[samPC+1].opcode == opLdLit)
		{
		  oop literal= newMethod->literalAt(nArgs);
		  assert(literal->isInteger());
		  nArgs= literal->integerValue();
		}

	      size_t startpc= sam[samPC+4].vPC;

	      sam[samPC+0].opcode= opLambda;		// LdThisCtx  <-  Lambda
	      sam[samPC+0].arg= (startpc << 8) | nArgs;
	      sam[samPC+1].beDeleted();			// LdInt      <-  <deleted>
	      sam[samPC+1].arg= 0;
	      sam[samPC+2].opcode= opJmp;		// BlockCopy  <-  Jmp
	      sam[samPC+2].arg= resume;
	      sam[samPC+2].beBend();
	      sam[samPC+3].opcode= opBlockActivate;	// Jmp        <-  BlockActivate
	      sam[samPC+3].arg= (resume << 8) | nArgs;

	      // block entry needs checkpoint and recombination
	      sam[samPC+3].noBend().beCkpt().join().recombine(nArgs);

	      // block body needs checkpoint
	      sam[samPC+4].beCkpt()/*.noComb()*/;

	      // resume needs recombination
	      sam[resume].recombine(stackDepth + 1);	// + 1 coz we push closure

	      goto doLambda;	// just to be consistent between passes
	    }
	  else
	    {
	      // Or a REAL LdThisContext??
	      genLdThisContext(inBlock());
	    }
	  pushStack();
	  break;

	case opLdLit:
#	  ifdef OPTIMISE_LITERALS
	  if (pass == 1)
	    {
	      oop literal= newMethod->literalAt(argWord());
	      if (literal->isInteger())
		{
		  PRINTF(("LdLit %p -> LdInt %d\n", literal, literal->integerValue()));
		  sam[samPC].opcode= opLdInt;
		  sam[samPC].arg= literal->integerValue();
		  goto doLdInt;
		}
	    }
#	  endif // OPTIMISE_LITERALS
	  genLdLit(argWord(), inBlock());
	  pushStack();
	  break;

	case opLdInst:
	  assert(argHi() == 0);
	  genLdInst(argWord(), inBlock(), inContext);
	  pushStack();
	  if (inContext && (pass == 1))
	    sam[samPC+1].beCkpt();
	  break;

	case opStInst:
	  assert(argHi() == 0);
	  genStInst(argWord(), inBlock(), inContext);
	  if (inContext && (pass == 1))
	    sam[samPC+1].beCkpt();
	  break;

	case opPopInst:
	  assert(argHi() == 0);
	  genPopInst(argWord(), inBlock(), inContext);
	  popStack(1);
	  if (inContext && (pass == 1))
	    sam[samPC+1].beCkpt();
	  break;

	case opLdTemp:
	  assert(argHi() == 0);
#ifdef NEW_FRAMES
	  if (argWord() < newMethod->argCount())
	    genLdArg(argWord(), inBlock());
	  else
#endif
	    genLdTemp(argWord(), inBlock());
	  pushStack();
	  break;

	case opStTemp:
	  assert(argHi() == 0);
#ifdef NEW_FRAMES
	  if (argWord() < newMethod->argCount())
	    genStArg(argWord(), inBlock());
	  else
#endif
	    genStTemp(argWord(), inBlock());
	  // no stack effect
	  break;

	case opPopTemp:
	  assert(argHi() == 0);
#ifdef NEW_FRAMES
	  if (argWord() < newMethod->argCount())
	    genStArg(argWord(), inBlock());
	  else
#endif
	    genStTemp(argWord(), inBlock());
	  genPop();
	  popStack(1);
	  break;

	case opLdLitInd:
	  assert(argHi() == 0);
	  genLdLitInd(argWord(), inBlock());
	  pushStack();
	  break;

	case opStLitInd:
	  assert(argHi() == 0);
	  genStLitInd(argWord(), inBlock());
	  // no stack effect
	  break;

	case opPopLitInd:
	  assert(argHi() == 0);
	  genStLitInd(argWord(), inBlock());
	  genPop();
	  popStack(1);
	  break;

	case opJmp:
	  {
	    const size_t dest= argWord();
	    assert(dest < samEnd);
	    assert(dest != samPC);	// Zzzzzzz...

#	   ifdef DELETE_UNREACHABLE
	    if (pass == 1)
	      {
		warningIf(!sam[samPC+1].isDest(), "unreachable code", samPC+1);
		if (!sam[samPC+1].isDest())
		  {
		    size_t spc= samPC+1;
		    // note: samEnd is artificial dest
		    while (!sam[spc].isDest())
		      sam[spc++].beDeleted();
		    assert((sam[spc].isComb()) || (spc == samEnd));
		  }
	      }
#	   else
	    // must recombine after control flow change
	    assert(sam[samPC+1].isComb());
#	   endif

	    if (   (pass == 1)
		   && (sam[samPC-1].isBend())
		   && (!sam[samPC].isDest()) )
	      {
		// we're an orphaned jump: ignore ourselves!
		assert(sam[dest].isDest());	// sanity check
		sam[dest].unjoin();
		sam[samPC].beDeleted();
		break;	// get out fast before causing too much damage
	      }

	    if (dest < samPC) // backward jump
	      {
		// if jump verification is on, check that the current
		// stack depth == backward destination stack depth
#		ifdef VERIFY_JUMPS
		assert(sam[dest].stackDepth == stackDepth);
#		endif
		// destination needs interrupt checkpoint
		if (pass == 1) sam[dest].beCkpt();
	      }
	    else // forward jump
	      {
#		ifdef COMPLETE_MAP
		if (pass == 1) sam[dest].beCkpt();
#		endif
	      }

	    if (sam[dest-1].isBend())
	      {
		// we are probably the only flow of control into that insn
		if (pass ==1 )
		  {
		    sam[dest].recombine(stackDepth);
		  }
		else
		  {
		    assert(sam[dest].stackDepth == stackDepth);
		  }
	      }

	    // must be careful about backward jumps with interrupt checks!
	    insn *nDest= ((pass == 1)
			  ? ((dest < samPC) ? (asm_pc - 8) : (asm_pc + 8))
			  : sam[dest].nPC);
	    if (pass == 2)
	      assert(sam[dest].nPC != asm_pc);	// Zzzzzzz...
	    genJmp(nDest);
	  }
	  break;

	case opJmpF:
	  {
	    const size_t dest= argWord();
	    assert(dest < samEnd);
	    assert(dest != samPC);	// Zzzzzzz...

	    // following insn needs checkpoint (non-bool rcvr)
	    if (pass == 1)
	      sam[samPC+1].beCkpt();

	    if (dest < samPC) // backward jump
	      {
		// if jump verification is on, check that the current
		// stack depth == backward destination stack depth
#		ifdef VERIFY_JUMPS
		assert(sam[dest].stackDepth == stackDepth);
#		endif
		// destination needs interrupt checkpoint
		if (pass == 1) sam[dest].beCkpt();
	      }
	    else // forward jump
	      {
		if (pass == 1)
		  {
		    // forward destination needs recombination
		    sam[dest].recombine(stackDepth - 1);	// we pop Boolean
#		    ifdef COMPLETE_MAP
		    if (pass == 1) sam[dest].beCkpt();
#		    endif
		  }
		else // pass == 2
		  {
		    assert(sam[dest].stackDepth == stackDepth - 1);
		  }
	      }
	    if (!sam[samPC].isSqsh())
	      {
		genJmpF(jumpDest(samPC));
	      }
	    popStack(1);
	  }
	  break;

	case opJmpT:
	  {
	    const size_t dest= argWord();
	    assert(dest < samEnd);
	    assert(dest != samPC);	// Zzzzzzz...

	    // following insn needs checkpoint (non-bool rcvr)
	    if (pass == 1)
	      sam[samPC+1].beCkpt();

	    if (dest < samPC) // backward jump
	      {
		// if jump verification is on, check that the current
		// stack depth == backward destination stack depth
#		ifdef VERIFY_JUMPS
		assert(sam[dest].stackDepth == stackDepth);
#		endif
		// destination needs interrupt checkpoint
		if (pass == 1) sam[dest].beCkpt();
	      }
	    else // forward jump
	      {
		if (pass == 1)
		  {
		    // forward destination needs recombination
		    sam[dest].recombine(stackDepth - 1);	// we pop Boolean
#		    ifdef COMPLETE_MAP
		    if (pass == 1) sam[dest].beCkpt();
#		    endif
		  }
		else // pass == 2
		  {
		    assert(sam[dest].stackDepth == stackDepth - 1);
		  }
	      }
	    if (!sam[samPC].isSqsh())
	      {
		genJmpT(jumpDest(samPC));
	      }
	    popStack(1);
	  }
	  break;

	case opSuper:
	  genSuper(argHi(), argLo());
	  popStack(argLo()+1);			// receiver+args
	  pushStack();				// answer
	  if (pass == 1) sam[samPC+1].beCkpt();	// savedIP
	  break;

	case opSend:
	  genSend(argHi(), argLo());
	  popStack(argLo()+1);			// rcvr+args
	  pushStack();				// answer
	  if (pass == 1) sam[samPC+1].beCkpt();	// savedIP
	  break;

	case opRetSelf:
	  genLdSelf(inBlock(), receiverTag);
	  pushStack();
	  goto doRetMethod;
	  break;
	  
	case opRetTrue:
	  genLdTrue();
	  pushStack();
	  goto doRetMethod;
	  break;

	case opRetFalse:
	  genLdFalse();
	  pushStack();
	  goto doRetMethod;
	  break;

	case opRetNil:
	  genLdNil();
	  pushStack();
	  goto doRetMethod;
	  break;

	doRetMethod:
	case opRetMethod:
	  if (inBlock()) {
	    genRemoteRetTop();
	  } else {
	    genLocalRetTop();
	  }
	finishRet:
	  popStack(1);
	  if (pass == 1)
	    {
#	     ifdef DELETE_UNREACHABLE
	      if (!sam[samPC+1].isDest())
		{
		  warningIf(!sam[samPC+1].isDest(), "unreachable code", samPC+1);
		  size_t spc= samPC+1;
		  // note: samEnd is artificial dest
		  while ((spc < samEnd) && !sam[spc].isDest())
		    {
		      sam[spc++].noComb().noCkpt().beDeleted();
		    }
		  assert((sam[spc].isComb()) || (spc == samEnd));
		}
#	     else
	      assert((sam[samPC+1].isComb()) || (samPC+1 == samEnd));
#	     endif
          sam[samPC+1].beCkpt();	// #cannotReturn
	    }
	  // must recombine after control flow change
	  break;

	  // local return from block: never executed in method
	case opRetBlock:
	  assert(inBlock());
	  genLocalRetTop();
	  goto finishRet;
	  break;

	case opAdd:
	  genAdd(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();	// tag trap
	  break;

	case opSubtract:
	  genSubtract(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();	// tag trap
	  break;

	case opMultiply:
	  genMultiply(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();	// ditto
	  break;

	case opDivide:
	  genDivide(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();	// you got the idea...
	  break;

	case opDiv:
	  genDiv(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opMod:
	  genMod(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

#	define RELATION(NAME)						\
	  {								\
	    bool squash= false;						\
	    if (sam[samPC+1].isCondJump())				\
	      squash= gen##NAME(pass, sam[samPC+1].jumpType(),		\
				jumpDest(samPC+1), nextDest(samPC+2),	\
				!sam[samPC+1].isDest());		\
	    else							\
	      gen##NAME(pass);						\
	    popStack(2);						\
	    pushStack();						\
	    if (pass == 1)						\
	      {								\
		if (squash)						\
		  {							\
		    PRINTF((#NAME" squash at %p\n", sam[samPC].nPC));	\
		    assert(!sam[samPC].isComb());			\
		    assert(!sam[samPC].isDest());			\
		    sam[samPC+1].noCkpt().squash();			\
		  }							\
		else							\
		  sam[samPC+1].beCkpt();				\
	      }								\
	  }

	case opLessThan:	RELATION(LessThan);		break;
	case opGreaterThan:	RELATION(GreaterThan);		break;
	case opLessOrEqual:	RELATION(LessOrEqual);		break;
	case opGreaterOrEqual:	RELATION(GreaterOrEqual);	break;
	case opEqual:		RELATION(Equal);		break;
	case opNotEqual:	RELATION(NotEqual);		break;

#	undef RELATION

	case opEquivalent:
	  {
	    bool squash= false;
	    if (sam[samPC+1].isCondJump())
	      {
		squash= genEquivalent(sam[samPC+1].jumpType(),
				      jumpDest(samPC+1), nextDest(samPC+2),
				      !sam[samPC+1].isDest());
	      }
	    else
	      {
		genEquivalent();	// no checkpoint: never fails
	      }
	    popStack(2);
	    pushStack();
#	    ifdef COMPLETE_MAP
	    if (pass == 1) sam[samPC+1].beCkpt();
#	    endif
	    if (squash)
	      {
		PRINTF(("eqv squash at %p\n", sam[samPC].nPC));
		if (pass == 1)
		  {
		    if (sam[samPC+1].isComb() || sam[samPC+1].isDest())
		      noteError();
		    assert(!sam[samPC+1].isComb());
		    assert(!sam[samPC+1].isDest());
		    sam[samPC+1].noCkpt().squash();
		  }
	      }
	  }
	  break;

	case opBitShift:
	  genBitShift();
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opBitAnd:
	  genBitAnd(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opBitOr:
	  genBitOr(pass);
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opAt:
	  genAt();
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opAtPut:
	  genAtPut();
	  popStack(3);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opSize:
	  genSize();
	  popStack(1);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opNext:
	  genNext();
	  popStack(1);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opNextPut:
	  genNextPut();
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opAtEnd:
	  genAtEnd();
	  popStack(1);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opDo:
	  genDo();
	  popStack(2);
	  pushStack();
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opClass:
	  genClass();	// no checkpoint: never fails
	  popStack(1);
	  pushStack();
#	  ifdef COMPLETE_MAP
	  if (pass == 1) sam[samPC+1].beCkpt();
#	  endif
	  break;

	case opBlockCopy:
	  // if we got here it's because we didn't reduce to Lambda in
	  // LdThisCtx... sigh... there's not much we can do except
	  // just send the damn message
	  genSpecial(SelectorBlockCopyIndex, 1);
	  popStack(2);
	  pushStack();
	  // NOTE: we just ignored a whole bunch of recombination info
	  // that normally would have been used by the optimiser;
	  // still, anyone who goes sending #blockCopy: explicitly
	  // deserves all that's coming to them.
	  break;

	case opValue:
	  genValue();
	  popStack(1);	// closure
	  pushStack();	// value
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opValueWithArg:
	  genValueWithArg();
	  popStack(2);	// closure+arg
	  pushStack();	// value
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opNew:
	  genNew();
	  popStack(1);	// Behaviour
	  pushStack();	// instance
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opNewWithArg:
	  genNewWithArg();
	  popStack(2);	// Behaviour+size
	  pushStack();	// instance
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opMakePoint:
	  genMakePoint();
	  popStack(2);	// abscissa+ordinate
	  pushStack();	// Point
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opPointX:
	  genPointX();
	  popStack(1);	// Point
	  pushStack();	// abscissa
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	case opPointY:
	  genPointY();
	  popStack(1);	// Point
	  pushStack();	// ordinate
	  if (pass == 1) sam[samPC+1].beCkpt();
	  break;

	  // optimised

	case opLambda:
	doLambda:
	  {
	    const size_t startpc= argHi();
	    const size_t nArgs= argLo();
	    genLambda(startpc, nArgs, inBlock());
	    pushStack();
	  }
	  break;

	case opBlockActivate:
	  {
	    const size_t nArgs= argLo();
	    const size_t resume= argHi();

	    if (pass == 1)
	      blocks[nBlocks++]= samPC;
	    genBlockActivate(nMeth, nArgs);
	    if (resume > blockLimit)
	      blockLimit= resume;
#ifdef	    ASM_LISTING
	    printf("[BLOCK TO %d]\n", resume);
#endif
	  }
	  break;

	  // pseudo-ops

	case opDeleted:
	  assert(!sam[samPC].isComb());
	  assert(!sam[samPC].isBend());
	  // last <deleted> marks checkpoint and pseudo-dest
	  assert((samPC == samEnd) || !sam[samPC].isDest());
	  //assert((samPC == samEnd) || !sam[samPC].isCkpt()); FIXME? MD
	  break;

	default:
	  fatal("untranslatable samcode: %d", sam[samPC].opcode);
	  break;

	} // switch(bytecode)

     if (samPC > 0)
	assert(sam[samPC].nPC >= sam[samPC-1].nPC);

#     ifdef DSM_LISTING
      if (pass == 2)
	{
	  while (dsm_org < (char *)asm_pc)
	    {
	      printf("     %08x\t", (unsigned)dsm_org);
	      dsm_org+= disassemble((void *)dsm_org, stdout);
	      printf("\n");
	    }
	}
#     endif

    } // for(0 <= samPC < samEnd)

    genFinalise(pass);

    if (pass == 1)
      {
	if ((size_t)((char *)asm_pc - ((char *)nMeth->start)) > codeSize)
	  fatal("native code overflow: %d > %d",
		(size_t)((char *)asm_pc - ((char *)nMeth->start)), codeSize);
	if ((size_t)nMeth->map->size > byteSize)
	  fatal("pc map overflow: %d > %d", nMeth->map->size, byteSize);
      }
    else // pass == 2
      {
	if (stackDepth > 0)
	  {
#	ifdef RETURN_WARNINGS
	    if (stackDepth > RETURN_FUZZ)
	      {
		printf("WARNING: in ");
		nMeth->print();
		printf(" [%p %p]", nMeth->start, asm_pc);
		printf("\nmethod returns with %d item%s remaining on the stack\n",
		       stackDepth, ((stackDepth == 1) ? "" : "s"));
	      }
#	endif
	  }
	else
	  {
	    // ensure that the optimiser agrees that the stack is empty
	    genRecombine(0, false);
	  }

	nMeth->finalise(asm_pc);
	gen_flush(nMeth->start, asm_pc);

	if (nBlocks > 0)
	  {
	    nMeth->blockMap= new(nBlocks) PcMap;
	    // Note: the samPCs in blocks[] correspond to the block
	    // prologues, which replace the "Jmp" in the original
	    // code.  The correct vPC to store in the map is therefore
	    // two greater than that found in the opcode.
	    for (size_t i= 0; i < nBlocks; ++i)
	      {
		nMeth->blockMap->notePC(sam[blocks[i]].vPC + 2, sam[blocks[i]].nPC);
	      }
	    nMeth->blockMap->finalise();
	  }

#	ifdef ASM_LISTING
	printf("NativeMethod %p: code size: %d bytes, map size: %d entries\n",
	       nMeth, (int)asm_pc - (int)nMeth->start,
	       nMeth->map->size);
#	endif

	gen_hTotal+= (int)nMeth->checkedEntry - (int)nMeth->start;
	gen_nTotal+= (int)asm_pc - (int)nMeth->checkedEntry;
	gen_vTotal+= endPC - startPC + 1;


#     	ifdef DSM_LISTING
	char *addr= (char *)nMeth->start;
	while (addr < (char *)asm_pc)
	  {
	    printf("     %08x\t", (unsigned)addr);
	    addr+= disassemble((void *)addr, stdout);
	    printf("\n");
	  }
#     	endif // DSM_LISTING
      }
  } // for (1 <= pass <= 2)

  if (compilationErrors)
    {
      fprintf(stderr, "exiting due to compilation errors\n");
      exit(1);
    }

  assert(nMeth->includesVPC(endPC+1));

  nMeth->end= asm_pc;

  nativeMemoizer->atPut(methodIndex, nMeth);
  newNativeMethod= nMeth;

  if (withSpy) spyTranslated();
}
