#include "jit.h"

#ifdef USE_X86_JIT

#include <stdlib.h>
#include "slatevm.h"

#define JIT_EXECUTE(interp, jitMethod, codePointer) \
        (((void (*) (struct Interpreter *, JitMethod *, unsigned int)) executeCode) \
                (interp, jitMethod, codePointer))

#define JIT_INVALIDATE(interp, jitMethod) \
        (((void (*) (struct Interpreter *, JitMethod *)) invalidateCode) \
                (interp, jitMethod))
 
typedef struct {
        struct CompiledMethod * method;
        unsigned int * codeMap;
        unsigned char * nativeCode;
        unsigned int nativeCodePosition;
        unsigned int nativeCodeSize;
} JitMethod;

volatile static int cacheFlushed = 0;
static JitMethod * runningMethod = NULL;
static struct Interpreter * runningInterpreter = NULL;
static unsigned char nativeCodeCache [JIT_CACHE_SIZE];
static unsigned int nativeCodePosition = 0;
static JitMethod cachedMethods [JIT_CACHE_METHODS];
static unsigned char executeCode [256];
static unsigned char invalidateCode [256];

void 
jitFlush (void)
{
        if (cacheFlushed)
          return;
        
        if (runningMethod != NULL)
          JIT_INVALIDATE (runningInterpreter, runningMethod);

        cacheFlushed = 1;
        nativeCodePosition = 0;
        memset (cachedMethods, 0, sizeof (cachedMethods));
}

static JitMethod *
findMethod (struct CompiledMethod * method)
{
        unsigned int hash = (unsigned) method & (JIT_CACHE_METHODS - 1);
        JitMethod * jitMethod;

        for (jitMethod = & cachedMethods [hash];
             jitMethod < & cachedMethods [JIT_CACHE_METHODS];
             ++ jitMethod)
          if (jitMethod -> method == method || jitMethod -> method == NULL)
            return jitMethod;

        for (jitMethod = cachedMethods;
             jitMethod < & cachedMethods [hash];
             ++ jitMethod)
          if (jitMethod -> method == method && jitMethod -> method == NULL)
            return jitMethod;

        return NULL;
}

static void
newMethod (struct CompiledMethod * method, JitMethod * jitMethod)
{
        unsigned int codeSize = PSObject_payloadSize ((struct Object *) method -> code);

        jitMethod -> method = method;
        jitMethod -> codeMap = (unsigned int *) (nativeCodeCache + nativeCodePosition);
        nativeCodePosition += (codeSize + 1) * sizeof (unsigned int);
        memset (jitMethod -> codeMap, 0xFF, (codeSize + 1) * sizeof (unsigned int));
        jitMethod -> nativeCode = nativeCodeCache + nativeCodePosition;
        nativeCodePosition += (codeSize + 1) * JIT_SPACE_PER_BC;
        jitMethod -> nativeCodePosition = 0;
        jitMethod -> nativeCodeSize = (codeSize + 1) * JIT_SPACE_PER_BC;
}

static unsigned int
reverseMapCodePointer (JitMethod * jitMethod, unsigned int ip)
{
        unsigned int * codeMap = jitMethod -> codeMap,
                     * codeMapEnd = codeMap + PSObject_payloadSize ((struct Object *) jitMethod -> method -> code),
                     * closest = NULL;

        while (codeMap < codeMapEnd)
        {
          if (* codeMap < ip && (closest == NULL || * codeMap > * closest))
            closest = codeMap;
          ++ codeMap;
        }

        if (closest != NULL && closest [1] != 0xFFFFFFFF && ip <= closest [1])
          return closest - jitMethod -> codeMap;
        else
          return 0xFFFFFFFF;
}

static unsigned int
nextCodePointer (JitMethod * jitMethod, unsigned int codePointer)
{
        unsigned char * code = jitMethod -> method -> code -> elements;
        unsigned char op = code [codePointer];

        if ((op & 0x0F) != BCExtended)
        {
          if ((op >> 4) == 0xF)
            while (code [++ codePointer] >= 0x80);
          switch (op & 0x0F)
          {
          case 0x03: /*BCLoadFreeVariable*/
          case 0x04: /*BCStoreFreeVariable*/
            while(code [++ codePointer] >= 0x80);
            break;
          } 
        }
        else
        switch (op)
        {
        case 0x0F: /*BCJumpTo*/
        case 0x1F: /*BCBranchIfTrue*/
        case 0x2F: /*BCBranchIfFalse*/
          codePointer += 2;
          break;
        }        
        
        return codePointer + 1;
}
                     
#define X86_EAX 0
#define X86_ECX 1
#define X86_EDX 2
#define X86_EBX 3
#define X86_ESP 4
#define X86_EBP 5
#define X86_ESI 6
#define X86_EDI 7

#define X86_IMM32(where, m) (*(unsigned int *)where = (unsigned int) (m), where += 4)
#define X86_MEM32(where, m) X86_IMM32(where, m)

#define X86_NOP(where) (*where++ = 0x90)

#define X86_MOVRI(where, r, i) (*where++ = 0xB8 + r, X86_IMM32(where, i))
#define X86_MOVMI(where, m, i) (*where++ = 0xC7, *where++ = 0x05, X86_MEM32(where, m), X86_IMM32(where, i))
#define X86_MOVRM(where, r, m) (*where++ = 0x8B, *where++ = (r << 3) | 0x05, X86_MEM32(where, m))
#define X86_MOVMR(where, m, r) (*where++ = 0x89, *where++ = (r << 3) | 0x05, X86_MEM32(where, m))
#define X86_MOVRBD(where, r, m, i) (*where++ = 0x8B, *where++ = (r << 3) | m | 0x80, X86_IMM32(where, i))
#define X86_MOVBDR(where, m, i, r) (*where++ = 0x89, *where++ = (r << 3) | m | 0x80, X86_IMM32(where, i))
#define X86_MOVRR(where, r, s) (*where++ = 0x8B, *where++ = (r << 3) | s | 0xC0)

#define X86_ADDRR(where, r, s) (*where++ = 0x03, *where++ = (r << 3) | s | 0xC0)
#define X86_ADDRI(where, r, i) (*where++ = 0x81, *where++ = r | 0xC0, X86_IMM32(where, i))

#define X86_TESTRR(where, r, s)  (*where++ = 0x85, *where++ = (r << 3) | s | 0xC0)
#define X86_TESTRI(where, r, i)  (*where++ = 0xF7, *where++ = r | 0xC0, X86_IMM32(where, i))
#define X86_CMPRR(where, r, s) (*where++ = 0x39, *where++ = (r << 3) | s | 0xC0)
#define X86_CMPRI(where, r, i) (*where++ = 0x81, *where++ = (7 << 3) | r | 0xC0, X86_IMM32(where, i))

#define X86_PATCH(where, to) (*(unsigned int *) (where) = (unsigned int) (to) - (unsigned int) (where) - 4)
#define X86_JA(where)  (*where++ = 0x0F, *where++ = 0x87, where += 4, where - 4)
#define X86_JAE(where)  (*where++ = 0x0F, *where++ = 0x83, where += 4, where - 4)
#define X86_JB(where)  (*where++ = 0x0F, *where++ = 0x82, where += 4, where - 4)
#define X86_JBE(where)  (*where++ = 0x0F, *where++ = 0x88, where += 4, where - 4)
#define X86_JC(where)  (*where++ = 0x0F, *where++ = 0x82, where += 4, where - 4)
#define X86_JE(where)  (*where++ = 0x0F, *where++ = 0x84, where += 4, where - 4)
#define X86_JG(where)  (*where++ = 0x0F, *where++ = 0x8F, where += 4, where - 4)
#define X86_JGE(where)  (*where++ = 0x0F, *where++ = 0x8D, where += 4, where - 4)
#define X86_JL(where)  (*where++ = 0x0F, *where++ = 0x8C, where += 4, where - 4)
#define X86_JLE(where)  (*where++ = 0x0F, *where++ = 0x8E, where += 4, where - 4)
#define X86_JNA(where)  (*where++ = 0x0F, *where++ = 0x88, where += 4, where - 4)
#define X86_JNAE(where)  (*where++ = 0x0F, *where++ = 0x82, where += 4, where - 4)
#define X86_JNB(where)  (*where++ = 0x0F, *where++ = 0x83, where += 4, where - 4)
#define X86_JNBE(where)  (*where++ = 0x0F, *where++ = 0x87, where += 4, where - 4)
#define X86_JNC(where)  (*where++ = 0x0F, *where++ = 0x83, where += 4, where - 4)
#define X86_JNE(where)  (*where++ = 0x0F, *where++ = 0x85, where += 4, where - 4)
#define X86_JNG(where)  (*where++ = 0x0F, *where++ = 0x8E, where += 4, where - 4)
#define X86_JNGE(where)  (*where++ = 0x0F, *where++ = 0x8C, where += 4, where - 4)
#define X86_JNL(where)  (*where++ = 0x0F, *where++ = 0x8D, where += 4, where - 4)
#define X86_JNLE(where)  (*where++ = 0x0F, *where++ = 0x8F, where += 4, where - 4)
#define X86_JNO(where)  (*where++ = 0x0F, *where++ = 0x81, where += 4, where - 4)
#define X86_JNP(where)  (*where++ = 0x0F, *where++ = 0x8B, where += 4, where - 4)
#define X86_JNS(where)  (*where++ = 0x0F, *where++ = 0x89, where += 4, where - 4)
#define X86_JNZ(where)  (*where++ = 0x0F, *where++ = 0x85, where += 4, where - 4)
#define X86_JO(where)  (*where++ = 0x0F, *where++ = 0x80, where += 4, where - 4)
#define X86_JP(where)  (*where++ = 0x0F, *where++ = 0x8A, where += 4, where - 4)
#define X86_JPE(where)  (*where++ = 0x0F, *where++ = 0x8A, where += 4, where - 4)
#define X86_JPO(where)  (*where++ = 0x0F, *where++ = 0x8B, where += 4, where - 4)
#define X86_JS(where)  (*where++ = 0x0F, *where++ = 0x88, where += 4, where - 4)
#define X86_JZ(where)  (*where++ = 0x0F, *where++ = 0x84, where += 4, where - 4)
#define X86_JMP(where) (*where++ = 0xE9, where += 4, where - 4)
#define X86_JMPR(where, r) (*where++ = 0xFF, *where++ = 0xE0 | r)

#define X86_CALL(where, label) (*where++ = 0xE8, X86_IMM32(where, (unsigned int) label - (unsigned int) where - 4), where - 4)
#define X86_CALLR(where, r) (*where++ = 0xFF, *where++ = r | 0xD0)
#define X86_LEAVE(where) (*where++ = 0xC9)
#define X86_RET(where) (*where++ = 0xC3)
#define X86_PUSHI(where, i) (*where++ = 0x68, X86_IMM32(where, i))
#define X86_PUSHR(where, r) (*where++ = 0x50 + r)

#define X86_LEABS(where, r, b, i, s) \
        (*where++ = 0x8D, \
         *where++ = (r << 3) | 0x4, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6))

#define X86_LEABSD(where, r, b, i, s, d) \
        (*where++ = 0x8D, \
         *where++ = (r << 3) | 0x84, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define X86_LEASD(where, r, i, s, d) \
        (*where++ = 0x8D, \
         *where++ = (r << 3) | 0x4, \
         *where++ = 0x5 | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define X86_MOVRBS(where, r, b, i, s) \
        (*where++ = 0x8B, \
         *where++ = (r << 3) | 0x4, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6))

#define X86_MOVRBSD(where, r, b, i, s, d) \
        (*where++ = 0x8B, \
         *where++ = (r << 3) | 0x84, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define X86_MOVRSD(where, r, i, s, d) \
        (*where++ = 0x8B, \
         *where++ = (r << 3) | 0x4, \
         *where++ = 0x5 | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define X86_MOVBSR(where, b, i, s, r) \
        (*where++ = 0x89, \
         *where++ = (r << 3) | 0x4, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6))

#define X86_MOVBSDR(where, b, i, s, d, r) \
        (*where++ = 0x89, \
         *where++ = (r << 3) | 0x84, \
         *where++ = b | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define X86_MOVSDR(where, i, s, d, r) \
        (*where++ = 0x89, \
         *where++ = (r << 3) | 0x4, \
         *where++ = 0x5 | (i << 3) | \
           ((s == 8 ? 3 : \
              (s == 4 ? 2 : \
                (s == 2 ? 1 : 0))) << 6), \
         X86_IMM32(where, d))

#define ENSURE_PIC_SPACE(space) \
  if (nativeCode + (space) > nativeCodeCache + sizeof (nativeCodeCache)) \
  { \
    jitFlush (); \
    goto failGeneration; \
  }

static void
generatePic (ObjectPointer selector, ObjectPointer * args, unsigned int arity, struct OopArray * opts, unsigned char * patch)
{
  ObjectPointer firstArg = args [0];
  unsigned char * nativeCode = nativeCodeCache + nativeCodePosition,
                * pic = nativeCode;
  unsigned int nextPic = (unsigned int) patch + 4 + * (unsigned int *) patch,
               cacheMask, 
               i;
  struct MethodDefinition * def;
  struct CompiledMethod * method;

/* EDX = args, ECX = opts */
  def = ObjectPointer_dispatchTo_arity_above_ (selector, args, arity, 0);
  if (def == NULL)
    goto failGeneration;

  method = (struct CompiledMethod *) def -> method;
  if (method -> traits != CurrentMemory -> PrimitiveMethodWindow &&
      method -> traits != CurrentMemory -> CompiledMethodWindow)
    goto failGeneration;

  cacheMask = ObjectPointer_asSmallInt (((struct Symbol *) selector) -> cacheMask);

  for (i = 0; (1 << i) <= cacheMask; ++ i)
  {
    unsigned char * ref;
    ObjectPointer arg;
 
    if ((cacheMask & (1 << i)) == 0)
      continue; 

    arg = (i == 0 ? firstArg : args [i]);

    ENSURE_PIC_SPACE (48)

    X86_MOVRBD (nativeCode, X86_EAX, X86_EDX, i * sizeof (ObjectPointer));

    if (ObjectPointer_isSmallInt (arg))
    {
      X86_TESTRI (nativeCode, X86_EAX, 1);
      ref = X86_JZ (nativeCode);
      X86_PATCH (ref, nextPic);
    }
    else
    {
      X86_TESTRI (nativeCode, X86_EAX, 1);
      ref = X86_JNZ (nativeCode);
      X86_PATCH (ref, nextPic);
      X86_MOVRBD (nativeCode, X86_EAX, X86_EAX, & ((struct Object *) 0) -> map);
      X86_CMPRI (nativeCode, X86_EAX, ((struct Object *) arg) -> map);
      ref = X86_JNE (nativeCode);
      X86_PATCH (ref, nextPic);
    }
  }

  ENSURE_PIC_SPACE (128)

  if (def -> slotAccessor != CurrentMemory -> NilObject)
  {
    struct SlotEntry * se;
    unsigned char * continuation;
    if (runningMethod -> codeMap [runningInterpreter -> codePointer] == 0xFFFFFFFF)
      goto failGeneration;
    se = PSSlotTable_hashEntryForName_ (((struct Object *) args [0]) -> map -> slotTable, def -> slotAccessor);
    if (se == NULL)
      goto failGeneration;
    if (arity == 1)
    {
      if (firstArg != args [0])
        X86_MOVRM (nativeCode, X86_EAX, args [0] + ObjectPointer_asSmallInt (se -> offset));
      else
      {
        X86_MOVRBD (nativeCode, X86_EAX, X86_EDX, 0 * sizeof (ObjectPointer));
        X86_MOVRBD (nativeCode, X86_EAX, X86_EAX, ObjectPointer_asSmallInt (se -> offset));
      }
    }
    else
    if (arity == 2)
    {
      X86_MOVRBD (nativeCode, X86_EAX, X86_EDX, 1 * sizeof (ObjectPointer));
      if (firstArg != args [0])
        X86_MOVMR (nativeCode, args [0] + ObjectPointer_asSmallInt (se -> offset), X86_EAX);
      else
      {
        X86_MOVRBD (nativeCode, X86_ECX, X86_EDX, 0 * sizeof (ObjectPointer));
        X86_MOVBDR (nativeCode, X86_ECX, ObjectPointer_asSmallInt (se -> offset), X86_EAX);
      }
    }
    else
      goto failGeneration;

    X86_PUSHR (nativeCode, X86_EAX);
    X86_PUSHI (nativeCode, runningInterpreter);
    X86_CALL (nativeCode, & PSInterpreter_stackPush_);
    X86_ADDRI (nativeCode, X86_ESP, 2 * 4);
    continuation = X86_JMP (nativeCode);
    X86_PATCH (continuation, runningMethod -> codeMap [runningInterpreter -> codePointer]);
  }
  else
  if (method -> traits == CurrentMemory -> PrimitiveMethodWindow)
  {
    unsigned char * continuation;
    if (runningMethod -> codeMap [runningInterpreter -> codePointer] == 0xFFFFFFFF)
      goto failGeneration;
    if (opts != NULL)
      X86_PUSHR (nativeCode, X86_ECX);
    else
      X86_PUSHI (nativeCode, NULL);
    X86_PUSHI (nativeCode, arity);
    X86_PUSHR (nativeCode, X86_EDX);
    X86_PUSHI (nativeCode, runningInterpreter);
    X86_CALL (nativeCode, primitives [ObjectPointer_asSmallInt (method -> selector)]);
    X86_ADDRI (nativeCode, X86_ESP, 4 * 4);
    X86_MOVRM (nativeCode, X86_EAX, & runningInterpreter -> method);
    X86_CMPRI (nativeCode, X86_EAX, runningMethod -> method);
    continuation = X86_JE (nativeCode);
    X86_PATCH (continuation, runningMethod -> codeMap [runningInterpreter -> codePointer]);
    X86_RET (nativeCode);
  }
  else
  {
    JitMethod * target;
    unsigned char * targetCode,
                  * validTarget;

    target = findMethod (method);
    if (target == NULL || target -> method != method || target -> codeMap [0] == 0xFFFFFFFF)
    {
      if (! JIT_IS_ACTIVATED (method))
        JIT_ACTIVATE (method);
      goto failGeneration;
    }
    if (opts != NULL)
      X86_PUSHR (nativeCode, X86_ECX);
    else
      X86_PUSHI (nativeCode, NULL);
    X86_PUSHI (nativeCode, arity);
    X86_PUSHR (nativeCode, X86_EDX);
    X86_PUSHI (nativeCode, method);
    X86_PUSHI (nativeCode, runningInterpreter);
    X86_CALL (nativeCode, & PSInterpreter_apply_to_arity_withOptionals_);
    X86_ADDRI (nativeCode, X86_ESP, 5 * 4);

    X86_MOVRM (nativeCode, X86_EAX, & runningInterpreter -> method);
    X86_CMPRI (nativeCode, X86_EAX, method);
    validTarget = X86_JE (nativeCode);
    X86_RET (nativeCode);
    X86_PATCH (validTarget, nativeCode);
    X86_MOVMI (nativeCode, & runningMethod, target);
    targetCode = X86_JMP (nativeCode);
    X86_PATCH (targetCode, target -> codeMap [0]);
  }

  X86_PATCH (patch, pic);
  nativeCodePosition = nativeCode - nativeCodeCache;

failGeneration:
  if (def == NULL)
    PSInterpreter_send_to_through_arity_withOptionals_ (runningInterpreter, selector, args, args, arity, opts);
  else
  if (method -> traits == CurrentMemory -> PrimitiveMethodWindow)
    (primitives [ObjectPointer_asSmallInt (method -> selector)]) (runningInterpreter, args, arity, opts);
  else
  if (method -> traits == CurrentMemory -> CompiledMethodWindow)
    PSInterpreter_apply_to_arity_withOptionals_ (runningInterpreter, method, args, arity, opts);
  else
    PSInterpreter_send_to_through_arity_withOptionals_ (runningInterpreter, selector, args, args, arity, opts);
}

#define ENSURE_SPACE(space) \
  if (growthLimit - nativeCode < space) \
  { \
    int extendedSize = (codeSize + 1 - interp -> codePointer) * JIT_SPACE_PER_BC; \
    if (space > extendedSize) \
      extendedSize = space; \
    if (extendedSize > sizeof (nativeCodeCache) - nativeCodePosition) \
    { \
      jitFlush (); \
      interp -> codePointer = codePointer; \
      return -1; \
    } \
\
    if (prevPointer != 0xFFFFFFFF || codeMap [codePointer] != 0xFFFFFFFF) \
    { \
      unsigned char * branch = X86_JMP (nativeCode); \
      X86_PATCH (branch, nativeCodeCache + nativeCodePosition); \
    } \
    nativeCode = nativeCodeCache + nativeCodePosition; \
    nativeCodePosition += extendedSize; \
\
    jitMethod -> nativeCode = nativeCode; \
    jitMethod -> nativeCodeSize = extendedSize; \
    growthLimit = nativeCode + extendedSize; \
  }

int 
jitInterpret (struct Interpreter * interp, JitMethod * jitMethod)
{
        struct CompiledMethod * method = jitMethod -> method;
        unsigned int codeSize = PSObject_payloadSize ((struct Object *) method -> code),
                     codePointer = 0xFFFFFFFF, 
                     prevPointer;
        unsigned char * code = method -> code -> elements,
                      * nativeCode = jitMethod -> nativeCode + jitMethod -> nativeCodePosition,
                      * growthLimit = jitMethod -> nativeCode + jitMethod -> nativeCodeSize;;
        unsigned int * codeMap = jitMethod -> codeMap;

        while (!cacheFlushed && (prevPointer = codePointer, codePointer = interp -> codePointer) < codeSize)
        {
          unsigned int op, val;
        
          if (codeMap [codePointer] != 0xFFFFFFFF)
          {
            unsigned char * branch = X86_JMP (nativeCode);
            X86_PATCH (branch, codeMap [codePointer]);

            jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
            assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
            JIT_EXECUTE (interp, jitMethod, codePointer);
            return 0;
          }

          ENSURE_SPACE (JIT_SPACE_PER_BC)

          codeMap [codePointer] = (unsigned int) nativeCode;
          op = code [codePointer];
          interp -> codePointer = codePointer + 1;
          
          val = op >> 4;
          if (val == 0xF)
            val = PSInterpreter_decodeImmediate (interp);

          switch (op & 0x0F)
          {
          case 0x00: /*BCSendMessage*/
          case 0x0B: /*BCSendMessageWithOptionals*/
            {
              struct OopArray * opts = NULL;
              ObjectPointer selector;
              ObjectPointer * args;
              unsigned char * defaultPic;
    
              if ((op & 0xF) == BCSendMessageWithOptionals)
                opts = (struct OopArray *) PSInterpreter_stackPop (interp);

              X86_MOVMI (nativeCode, & interp -> codePointer, interp -> codePointer);
              X86_MOVRM (nativeCode, X86_ECX, & interp -> stackPointer);
              X86_ADDRI (nativeCode, X86_ECX, -1 - val - (opts != NULL ? 1 : 0));
              X86_MOVMR (nativeCode, & interp -> stackPointer, X86_ECX);
              X86_MOVRM (nativeCode, X86_EDX, & interp -> stack);
              X86_LEABSD (nativeCode, X86_EDX, X86_EDX, X86_ECX, 4, ((struct OopArray *) 0) -> elements + 1);
              if (opts != NULL)
                X86_MOVRBD (nativeCode, X86_ECX, X86_EDX, val * sizeof (ObjectPointer));

              interp -> stackPointer -= val;
              args = & interp -> stack -> elements [interp -> stackPointer];
              selector = PSInterpreter_stackPop (interp);

              defaultPic = X86_JMP (nativeCode);
              X86_PATCH (defaultPic, nativeCode);
              X86_PUSHI (nativeCode, defaultPic);
              if (opts != NULL)
                X86_PUSHR (nativeCode, X86_ECX);
              else
                X86_PUSHI (nativeCode, NULL);
              X86_PUSHI (nativeCode, val);
              X86_PUSHR (nativeCode, X86_EDX);
              X86_PUSHI (nativeCode, selector);
              X86_CALL (nativeCode, & generatePic);
              X86_ADDRI (nativeCode, X86_ESP, 5 * 4);
              X86_RET (nativeCode);

              PSInterpreter_send_to_through_arity_withOptionals_ (interp, selector, args, args, val, opts);

              if (cacheFlushed)
                return 0;

              jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
              assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
              return 0;
            }

          case 0x01: /*BCLoadVariable*/
            if (method -> heapAllocate == CurrentMemory -> TrueObject)
            {
              X86_MOVRM (nativeCode, X86_EAX, & interp -> lexicalContext);
              X86_MOVRBD (nativeCode, X86_EAX, X86_EAX, ((struct LexicalContext *) 0) -> variables + val);
            }
            else
            {
              X86_MOVRM (nativeCode, X86_EAX, & interp -> stack);
              X86_MOVRM (nativeCode, X86_ECX, & interp -> framePointer);
              X86_MOVRBSD (nativeCode, X86_EAX, X86_EAX, X86_ECX, 4, ((struct OopArray *) 0) -> elements + val);
            }
            X86_PUSHR (nativeCode, X86_EAX);
            X86_PUSHI (nativeCode, interp);
            X86_CALL (nativeCode, & PSInterpreter_stackPush_);
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4);

            PSInterpreter_loadVariable_ (interp, val);
            continue;

          case 0x02: /*BCStoreVariable*/
            X86_MOVRM (nativeCode, X86_EAX, & interp -> stack);
            X86_MOVRM (nativeCode, X86_EDX, & interp -> stackPointer);
            X86_MOVRBSD (nativeCode, X86_EDX, X86_EAX, X86_EDX, 4, ((struct OopArray *) 0) -> elements - 1);

            if (method -> heapAllocate == CurrentMemory -> TrueObject)
            {
              X86_MOVRM (nativeCode, X86_EAX, & interp -> lexicalContext);
              X86_MOVBDR (nativeCode, X86_EAX, ((struct LexicalContext *) 0) -> variables + val, X86_EDX);
            }
            else
            {
              X86_MOVRM (nativeCode, X86_ECX, & interp -> framePointer);
              X86_MOVBSDR (nativeCode, X86_EAX, X86_ECX, 4, ((struct OopArray *) 0) -> elements + val, X86_EDX);
            }

            PSInterpreter_storeVariable_ (interp, val);
            continue;

          case 0x03: /*BCLoadFreeVariable*/
            {
                unsigned int freeVar = PSInterpreter_decodeImmediate (interp);
                
                X86_MOVRM (nativeCode, X86_EAX, & ((struct LexicalContext *) method -> lexicalWindow -> elements [val - 1]) -> variables [freeVar]);
                X86_PUSHR (nativeCode, X86_EAX);
                X86_PUSHI (nativeCode, interp);
                X86_CALL (nativeCode, & PSInterpreter_stackPush_);
                X86_ADDRI (nativeCode, X86_ESP, 2 * 4);
            
                PSInterpreter_stackPush_ (interp, ((struct LexicalContext *) method -> lexicalWindow -> elements [val - 1]) -> variables [freeVar]);
                continue;
            }

          case 0x04: /*BCStoreFreeVariable*/
            {
                unsigned int freeVar = PSInterpreter_decodeImmediate (interp);

                X86_MOVRM (nativeCode, X86_EAX, & interp -> stack);
                X86_MOVRM (nativeCode, X86_ECX, & interp -> stackPointer);
                X86_MOVRBSD (nativeCode, X86_EAX, X86_EAX, X86_ECX, 4, ((struct OopArray *) 0) -> elements - 1);

                X86_MOVMR (nativeCode, & ((struct LexicalContext *) method -> lexicalWindow -> elements [val - 1]) -> variables [freeVar], X86_EAX);

                ((struct LexicalContext *) method -> lexicalWindow -> elements [val - 1]) -> variables [freeVar] =
                  (interp -> stack -> elements [interp -> stackPointer - 1]);
                continue;
            }

          case 0x05: /*BCLoadLiteral*/
            X86_MOVRM (nativeCode, X86_EAX, interp -> method -> literals -> elements + val);
            X86_PUSHR (nativeCode, X86_EAX);
            X86_PUSHI (nativeCode, interp);
            X86_CALL (nativeCode, & PSInterpreter_stackPush_);
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4);

            PSInterpreter_loadLiteral_ (interp, val);
            continue;

          case 0x06: /*BCLoadSelector*/
            X86_MOVRM (nativeCode, X86_EAX, interp -> method -> selectors -> elements + val);
            X86_PUSHR (nativeCode, X86_EAX);
            X86_PUSHI (nativeCode, interp);
            X86_CALL (nativeCode, & PSInterpreter_stackPush_);
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4);

            PSInterpreter_loadSelector_ (interp, val);
            continue;

          case 0x07: /*BCPopStack*/
            X86_MOVRM (nativeCode, X86_EAX, & interp -> stackPointer);
            X86_ADDRI (nativeCode, X86_EAX, - val);
            X86_MOVMR (nativeCode, & interp -> stackPointer, X86_EAX);

            PSInterpreter_popStack_ (interp, val);
            continue;

          case 0x0C: /*BCReturnFrom*/
            X86_MOVMI (nativeCode, & interp -> codePointer, interp -> codePointer);
            X86_PUSHI (nativeCode, val);
            X86_PUSHI (nativeCode, interp);
            X86_CALL (nativeCode, & PSInterpreter_returnFrom_);
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4);
            X86_RET (nativeCode);

            PSInterpreter_returnFrom_ (interp, val);

            if (cacheFlushed)
              return 0;
        
            jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
            assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
            return 0;

          case 0x0A: /*BCBranchKeyed*/
            X86_MOVMI (nativeCode, & interp -> codePointer, interp -> codePointer);
            X86_PUSHI (nativeCode, val);
            X86_PUSHI (nativeCode, interp);
            X86_CALL (nativeCode, & PSInterpreter_branchKeyed_);
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4);
            X86_MOVRM (nativeCode, X86_EAX, & interp -> codePointer);
            X86_MOVRSD (nativeCode, X86_EAX, X86_EAX, 4, jitMethod -> codeMap);
            X86_CMPRI (nativeCode, X86_EAX, 0xFFFFFFFF);
            {
              unsigned char * branch = X86_JE (nativeCode);
              X86_JMPR (nativeCode, X86_EAX);
              X86_PATCH (branch, nativeCode);
              X86_RET (nativeCode);
            }

            codePointer = 0xFFFFFFFF;
            PSInterpreter_branchKeyed_ (interp, val);
            continue;

#define UNOPTIMIZED_BC(fun) \
            X86_PUSHI (nativeCode, val); \
            X86_PUSHI (nativeCode, interp); \
            X86_CALL (nativeCode, & fun); \
            X86_ADDRI (nativeCode, X86_ESP, 2 * 4); \
            fun (interp, val); \
            continue;

          case 0x08: /*BCNewArray*/
            UNOPTIMIZED_BC (PSInterpreter_newArray_)

          case 0x09: /*BCNewBlock*/
            UNOPTIMIZED_BC (PSInterpreter_newBlock_)

          case 0x0D: /*BCPushInteger*/
            UNOPTIMIZED_BC (PSInterpreter_pushInteger_)
        
          case 0x0F: /*BCExtended*/
            switch (op)
            {
            case 0x0F: /*BCJumpTo*/
              PSInterpreter_jumpTo (interp);
              continue;

            case 0x1F: /*BCBranchIfTrue*/
            case 0x2F: /*BCBranchIfFalse*/
              ENSURE_SPACE (128)
              { 
                int offset = PSInterpreter_decodeShort (interp);
                ObjectPointer cond = PSInterpreter_stackPop (interp);
                unsigned int succeedPointer, failPointer;
                unsigned char * branch;

                X86_PUSHI (nativeCode, interp);
                X86_CALL (nativeCode, & PSInterpreter_stackPop);
                X86_ADDRI (nativeCode, X86_ESP, 4);

                if (cond == (op == BCBranchIfTrue ? CurrentMemory -> TrueObject : CurrentMemory -> FalseObject))
                {
                  succeedPointer = interp -> codePointer + offset;
                  failPointer = interp -> codePointer;
                }
                else
                {
                  succeedPointer = interp -> codePointer;
                  failPointer = interp -> codePointer + offset;
                }

                X86_CMPRI (nativeCode, X86_EAX, cond);
                if (codeMap [succeedPointer] != 0xFFFFFFFF)
                {
                  branch = X86_JE (nativeCode);
                  X86_PATCH (branch, codeMap [succeedPointer]);
                  if (codeMap [failPointer] != 0xFFFFFFFF)
                  {
                    branch = X86_JMP (nativeCode);
                    X86_PATCH (branch, codeMap [failPointer]);
                  }
                  else
                  {
                    unsigned char * failAvailable;
                    X86_MOVRM (nativeCode, X86_EAX, & jitMethod -> codeMap [failPointer]);
                    X86_CMPRI (nativeCode, X86_EAX, 0xFFFFFFFF);
                    failAvailable = X86_JNE (nativeCode);
                    X86_MOVMI (nativeCode, & interp -> codePointer, failPointer);
                    X86_RET (nativeCode);
                    X86_PATCH (failAvailable, nativeCode);
                    X86_JMPR (nativeCode, X86_EAX);
                  }

                  jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
                  assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
                  JIT_EXECUTE (interp, jitMethod, succeedPointer);
                  return 0;
                }
                else
                if (codeMap [failPointer] != 0xFFFFFFFF)
                {
                  branch = X86_JNE (nativeCode);
                  X86_PATCH (branch, codeMap [failPointer]);
                }
                else
                {
                  unsigned char * failAvailable;
                  branch = X86_JE (nativeCode);
                  X86_MOVRM (nativeCode, X86_EAX, & jitMethod -> codeMap [failPointer]);
                  X86_CMPRI (nativeCode, X86_EAX, 0xFFFFFFFF);
                  failAvailable = X86_JNE (nativeCode);
                  X86_MOVMI (nativeCode, & interp -> codePointer, failPointer);
                  X86_RET (nativeCode);
                  X86_PATCH (failAvailable, nativeCode);
                  X86_JMPR (nativeCode, X86_EAX);
                  X86_PATCH (branch, nativeCode);
                }

                interp -> codePointer = succeedPointer;
                continue;
              }

            case 0x4F: /*BCResendMessage*/
              X86_MOVMI (nativeCode, & interp -> codePointer, interp -> codePointer);
              X86_PUSHI (nativeCode, interp);
              X86_CALL (nativeCode, & PSInterpreter_resendMessage);
              X86_ADDRI (nativeCode, X86_ESP, 4);
              X86_RET (nativeCode);

              PSInterpreter_resendMessage (interp);

              jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
              assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
              return 0;
              
#define UNOPTIMIZED_EXTENDED_BC(fun) \
              X86_PUSHI (nativeCode, interp); \
              X86_CALL (nativeCode, & fun); \
              X86_ADDRI (nativeCode, X86_ESP, 4); \
              fun (interp); \
              continue;

            case 0x3F: /*BCPushEnvironment*/
              UNOPTIMIZED_EXTENDED_BC (PSInterpreter_pushEnvironment)

            case 0x5F: /*BCPushNil*/ 
              UNOPTIMIZED_EXTENDED_BC (PSInterpreter_pushNil)

            case 0x6F: /*BCIsIdenticalTo*/
              UNOPTIMIZED_EXTENDED_BC (PSInterpreter_isIdenticalTo)

            case 0x7F: /*BCPushTrue*/
              UNOPTIMIZED_EXTENDED_BC (PSInterpreter_pushTrue)

            case 0x8F: /*BCPushFalse*/
              UNOPTIMIZED_EXTENDED_BC (PSInterpreter_pushFalse)
            }
          }  

        }

        if (cacheFlushed)
          return 0;

        ENSURE_SPACE (32)

        codeMap [interp -> codePointer] = (unsigned int) nativeCode;

        X86_MOVMI (nativeCode, & interp -> codePointer, interp -> codePointer);
        X86_PUSHI (nativeCode, 0);
        X86_PUSHI (nativeCode, interp);
        X86_CALL (nativeCode, & PSInterpreter_returnFrom_);
        X86_ADDRI (nativeCode, X86_ESP, 2 * 4);
        X86_RET (nativeCode);

        PSInterpreter_returnFrom_ (interp, 0);

        if (cacheFlushed)
          return 0;

        jitMethod -> nativeCodePosition = nativeCode - jitMethod -> nativeCode;
        assert (jitMethod -> nativeCodePosition <= jitMethod -> nativeCodeSize);
        return 0;
}

int 
jitRun (struct Interpreter * interp, struct CompiledMethod * method)
{
        JitMethod * jitMethod;
        unsigned int codeSize;
        cacheFlushed = 0;

        jitMethod = findMethod (method);
        if (jitMethod != NULL && jitMethod -> method == method)
        {
          unsigned int offset = jitMethod -> codeMap [interp -> codePointer];
          if (offset == 0xFFFFFFFF)
            return jitInterpret (interp, jitMethod);

          JIT_EXECUTE (interp, jitMethod, interp -> codePointer);
          return 0;
        }

        codeSize = PSObject_payloadSize ((struct Object *) method -> code);

        if ((codeSize + 1) * (JIT_SPACE_PER_BC + sizeof (unsigned int)) > sizeof (nativeCodeCache) - nativeCodePosition ||
            jitMethod == NULL) 
        {
          jitFlush ();
          if ((codeSize + 1) * (JIT_SPACE_PER_BC + sizeof (unsigned int)) > JIT_CACHE_SIZE)
            return -1;
          cacheFlushed = 0;
          jitMethod = findMethod (method);
        }

        newMethod (method, jitMethod);

        return jitInterpret (interp, jitMethod);
}

void
initJitModule (void)
{
        unsigned char * execute = executeCode,
                      * invalidate = invalidateCode,
                      * label,
                      * executeExit,
                      * ref,
                      * preserveRef;

        memset (cachedMethods, 0, sizeof (cachedMethods));
        
        /* JIT_EXECUTE */
        X86_PUSHR (execute, X86_EBP);
        X86_MOVRR (execute, X86_EBP, X86_ESP);
        /* edx = interp */
        X86_MOVRBD (execute, X86_EDX, X86_EBP, 8);
        X86_MOVMR (execute, & runningInterpreter, X86_EDX);
        /* eax = jitMethod */
        X86_MOVRBD (execute, X86_EAX, X86_EBP, 12);
        /* ecx = codePointer */
        X86_MOVRBD (execute, X86_ECX, X86_EBP, 16);
        /* runningMethod = jitMethod */
        X86_MOVMR (execute, & runningMethod, X86_EAX);
        /* eax = jitMethod -> codeMap [codePointer] */
        X86_MOVRBD (execute, X86_EAX, X86_EAX, & ((JitMethod *) 0) -> codeMap);
        X86_MOVRBS (execute, X86_EAX, X86_EAX, X86_ECX, 4);
        /* enter native code, stack may be offset if returning from a tail call */
        X86_CALLR (execute, X86_EAX); 
executeExit = execute;
        /* runningMethod = NULL */
        X86_MOVMI (execute, & runningMethod, NULL);
        X86_LEAVE (execute);
        X86_RET (execute);

        /* JIT_INVALIDATE */
        X86_PUSHR (invalidate, X86_EBP);
        X86_MOVRR (invalidate, X86_EBP, X86_ESP);
        /* edx = frame pointer */
        X86_MOVRR (invalidate, X86_EDX, X86_EBP);
/* start loop, find frame where native code is calling into the VM, or rather
 * a frame whose return address is somewhere in native code
 */
label = invalidate;
        /* eax = copy of current frame pointer */
        X86_MOVRR (invalidate, X86_EAX, X86_EDX);
        /* ecx = previous instruction pointer */
        X86_MOVRBD (invalidate, X86_ECX, X86_EDX, 4);
        /* edx = previous frame pointer */
        X86_MOVRBD (invalidate, X86_EDX, X86_EDX, 0);
        /* if previous instruction pointer < nativeCodeCache, loop */
        X86_CMPRI (invalidate, X86_ECX, nativeCodeCache);
ref = X86_JB (invalidate);
X86_PATCH (ref, label);
        /* if previous instruction pointer >= & nativeCodeCache [sizeof (nativeCodeCache)], loop */
        X86_CMPRI (invalidate, X86_ECX, & nativeCodeCache [sizeof (nativeCodeCache)]);
ref = X86_JAE (invalidate);
X86_PATCH (ref, label); 
/* exit loop */
        /* edx = instruction pointer returning from EXECUTE_CODE */
        X86_MOVRI (invalidate, X86_EDX, executeExit);
        /* native code calling into VM is patched to return to the above IP instead of native code */
        X86_MOVBDR (invalidate, X86_EAX, 4, X86_EDX);
        /* eax = jitMethod */
        X86_MOVRBD (invalidate, X86_EAX, X86_EBP, 12);
        X86_PUSHR (invalidate, X86_ECX);
        /* ecx = jitMethod -> method */
        X86_MOVRBD (invalidate, X86_ECX, X86_EAX, & ((JitMethod *) 0) -> method);
        /* edx = interp -> method */
        X86_MOVRBD (invalidate, X86_EDX, X86_EBP, 8);
        X86_MOVRBD (invalidate, X86_EDX, X86_EDX, & ((struct Interpreter *) 0) -> method);
        /* if (jitMethod -> method != interp -> method), skip code pointer fix-up */
        X86_CMPRR (invalidate, X86_ECX, X86_EDX);
ref = X86_JNE (invalidate);        
        X86_PUSHR (invalidate, X86_EAX);
        /* call eax = reverseMapCodePointer (jitMethod, native code return IP) */ 
        X86_CALL (invalidate, & reverseMapCodePointer);
        /* if reverse mapped CP == 0xFFFFFFFF, skip updating code pointer */
        X86_CMPRI (invalidate, X86_EAX, 0xFFFFFFFF);
preserveRef = X86_JE (invalidate);
        /* ecx = jitMethod */
        X86_MOVRBD (invalidate, X86_ECX, X86_EBP, 12);
        X86_PUSHR (invalidate, X86_EAX);
        X86_PUSHR (invalidate, X86_ECX);
        /* call eax = nextCodePointer (jitMethod, eax) */
        X86_CALL (invalidate, & nextCodePointer);
        /* ecx = interp */
        X86_MOVRBD (invalidate, X86_ECX, X86_EBP, 8);
        /* interp -> codePointer = eax */
        X86_MOVBDR (invalidate, X86_ECX, & ((struct Interpreter *) 0) -> codePointer, X86_EAX);
X86_PATCH (ref, invalidate);
X86_PATCH (preserveRef, invalidate);
        X86_LEAVE (invalidate);
        X86_RET (invalidate);
}

#endif /* USE_X86_JIT */

