# Simulator main loop for m32rx. -*- C -*-
#
# Copyright 1996, 1997, 1998, 2004, 2007, 2008, 2009
# Free Software Foundation, Inc.
#
# This file is part of the GNU Simulators.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Syntax:
# /bin/sh mainloop.in command
#
# Command is one of:
#
# init
# support
# extract-{simple,scache,pbb}
# {full,fast}-exec-{simple,scache,pbb}
#
# A target need only provide a "full" version of one of simple,scache,pbb.
# If the target wants it can also provide a fast version of same, or if
# the slow (full featured) version is `simple', then the fast version can be
# one of scache/pbb.
# A target can't provide more than this.

# ??? After a few more ports are done, revisit.
# Will eventually need to machine generate a lot of this.

case "x$1" in

xsupport)

cat <<EOF

/* Emit insns to write back the results of insns executed in parallel.
   SC points to a sufficient number of scache entries for the writeback
   handlers.
   SC1/ID1 is the first insn (left slot, lower address).
   SC2/ID2 is the second insn (right slot, higher address).  */

static INLINE void
emit_par_finish (SIM_CPU *current_cpu, PCADDR pc, SCACHE *sc,
		 SCACHE *sc1, const IDESC *id1, SCACHE *sc2, const IDESC *id2)
{
  ARGBUF *abuf;

  abuf = &sc->argbuf;
  id1 = id1->par_idesc;
  abuf->fields.write.abuf = &sc1->argbuf;
  @cpu@_fill_argbuf (current_cpu, abuf, id1, pc, 0);
  /* no need to set trace_p,profile_p */
#if 0 /* not currently needed for id2 since results written directly */
  abuf = &sc[1].argbuf;
  id2 = id2->par_idesc;
  abuf->fields.write.abuf = &sc2->argbuf;
  @cpu@_fill_argbuf (current_cpu, abuf, id2, pc + 2, 0);
  /* no need to set trace_p,profile_p */
#endif
}

static INLINE const IDESC *
emit_16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	 SCACHE *sc, int fast_p, int parallel_p)
{
  ARGBUF *abuf = &sc->argbuf;
  const IDESC *id = @cpu@_decode (current_cpu, pc, insn, insn, abuf);

  if (parallel_p)
    id = id->par_idesc;
  @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
  return id;
}

static INLINE const IDESC *
emit_full16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
	     int trace_p, int profile_p)
{
  const IDESC *id;

  @cpu@_emit_before (current_cpu, sc, pc, 1);
  id = emit_16 (current_cpu, pc, insn, sc + 1, 0, 0);
  @cpu@_emit_after (current_cpu, sc + 2, pc);
  sc[1].argbuf.trace_p = trace_p;
  sc[1].argbuf.profile_p = profile_p;
  return id;
}

static INLINE const IDESC *
emit_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	       SCACHE *sc, int fast_p)
{
  const IDESC *id,*id2;

  /* Emit both insns, then emit a finisher-upper.
     We speed things up by handling the second insn serially
     [not parallelly].  Then the writeback only has to deal
     with the first insn.  */
  /* ??? Revisit to handle exceptions right.  */

  /* FIXME: No need to handle this parallely if second is nop.  */
  id = emit_16 (current_cpu, pc, insn >> 16, sc, fast_p, 1);

  /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
  id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 1, fast_p, 0);

  /* Set sc/snc insns notion of where to skip to.  */
  if (IDESC_SKIP_P (id))
    SEM_SKIP_COMPILE (current_cpu, sc, 1);

  /* Emit code to finish executing the semantics
     (write back the results).  */
  emit_par_finish (current_cpu, pc, sc + 2, sc, id, sc + 1, id2);

  return id;
}

static INLINE const IDESC *
emit_full_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
		    SCACHE *sc, int trace_p, int profile_p)
{
  const IDESC *id,*id2;

  /* Emit both insns, then emit a finisher-upper.
     We speed things up by handling the second insn serially
     [not parallelly].  Then the writeback only has to deal
     with the first insn.  */
  /* ??? Revisit to handle exceptions right.  */

  @cpu@_emit_before (current_cpu, sc, pc, 1);

  /* FIXME: No need to handle this parallelly if second is nop.  */
  id = emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 1);
  sc[1].argbuf.trace_p = trace_p;
  sc[1].argbuf.profile_p = profile_p;

  @cpu@_emit_before (current_cpu, sc + 2, pc, 0);

  /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
  id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 3, 0, 0);
  sc[3].argbuf.trace_p = trace_p;
  sc[3].argbuf.profile_p = profile_p;

  /* Set sc/snc insns notion of where to skip to.  */
  if (IDESC_SKIP_P (id))
    SEM_SKIP_COMPILE (current_cpu, sc, 4);

  /* Emit code to finish executing the semantics
     (write back the results).  */
  emit_par_finish (current_cpu, pc, sc + 4, sc + 1, id, sc + 3, id2);

  @cpu@_emit_after (current_cpu, sc + 5, pc);

  return id;
}

static INLINE const IDESC *
emit_32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	 SCACHE *sc, int fast_p)
{
  ARGBUF *abuf = &sc->argbuf;
  const IDESC *id = @cpu@_decode (current_cpu, pc,
				  (USI) insn >> 16, insn, abuf);

  @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
  return id;
}

static INLINE const IDESC *
emit_full32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
	     int trace_p, int profile_p)
{
  const IDESC *id;

  @cpu@_emit_before (current_cpu, sc, pc, 1);
  id = emit_32 (current_cpu, pc, insn, sc + 1, 0);
  @cpu@_emit_after (current_cpu, sc + 2, pc);
  sc[1].argbuf.trace_p = trace_p;
  sc[1].argbuf.profile_p = profile_p;
  return id;
}

EOF

;;

xinit)

# Nothing needed.

;;

xextract-pbb)

# Inputs:  current_cpu, pc, sc, max_insns, FAST_P
# Outputs: sc, pc
# sc must be left pointing past the last created entry.
# pc must be left pointing past the last created entry.
# If the pbb is terminated by a cti insn, SET_CTI_VPC(sc) must be called
# to record the vpc of the cti insn.
# SET_INSN_COUNT(n) must be called to record number of real insns.

cat <<EOF
{
  const IDESC *idesc;
  int icount = 0;

  if ((pc & 3) != 0)
    {
      /* This occurs when single stepping and when compiling the not-taken
	 part of conditional branches.  */
      UHI insn = GETIMEMUHI (current_cpu, pc);
      int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
      int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
      SCACHE *cti_sc; /* ??? tmp hack */

      /* A parallel insn isn't allowed here, but we don't mind nops.
	 ??? We need to wait until the insn is executed before signalling
	 the error, for situations where such signalling is wanted.  */
#if 0
      if ((insn & 0x8000) != 0
	  && (insn & 0x7fff) != 0x7000) /* parallel nops are ok */
	sim_engine_invalid_insn (current_cpu, pc, 0);
#endif

      /* Only emit before/after handlers if necessary.  */
      if (FAST_P || (! trace_p && ! profile_p))
	{
	  idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, FAST_P, 0);
	  cti_sc = sc;
	  ++sc;
	  --max_insns;
	}
      else
	{
	  idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
			       trace_p, profile_p);
	  cti_sc = sc + 1;
	  sc += 3;
	  max_insns -= 3;
	}
      ++icount;
      pc += 2;
      if (IDESC_CTI_P (idesc))
	{
	  SET_CTI_VPC (cti_sc);
	  goto Finish;
	}
    }

  /* There are two copies of the compiler: full(!fast) and fast.
     The "full" case emits before/after handlers for each insn.
     Having two copies of this code is a tradeoff, having one copy
     seemed a bit more difficult to read (due to constantly testing
     FAST_P).  ??? On the other hand, with address ranges we'll want to
     omit before/after handlers for unwanted insns.  Having separate loops
     for FAST/!FAST avoids constantly doing the test in the loop, but
     typically FAST_P is a constant and such tests will get optimized out.  */

  if (FAST_P)
    {
      while (max_insns > 0)
	{
	  USI insn = GETIMEMUSI (current_cpu, pc);
	  if ((SI) insn < 0)
	    {
	      /* 32 bit insn */
	      idesc = emit_32 (current_cpu, pc, insn, sc, 1);
	      ++sc;
	      --max_insns;
	      ++icount;
	      pc += 4;
	      if (IDESC_CTI_P (idesc))
		{
		  SET_CTI_VPC (sc - 1);
		  break;
		}
	    }
	  else
	    {
	      if ((insn & 0x8000) != 0) /* parallel? */
		{
		  int up_count;

		  if (((insn >> 16) & 0xfff0) == 0x10f0)
		    {
		      /* FIXME: No need to handle this sequentially if system
		         calls will be able to execute after second insn in
		         parallel. ( trap #num || insn ) */
		      /* insn */
		      idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
 				       sc, 1, 0);
		      /* trap */
		      emit_16 (current_cpu, pc, insn >> 16, sc + 1, 1, 0);
		      up_count = 2;
		    }
		  else
		    {
		      /* Yep.  Here's the "interesting" [sic] part.  */
		      idesc = emit_parallel (current_cpu, pc, insn, sc, 1);
		      up_count = 3;
		    }
		  sc += up_count;
		  max_insns -= up_count;
		  icount += 2;
		  pc += 4;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (sc - up_count);
		      break;
		    }
		}
	      else /* 2 serial 16 bit insns */
		{
		  idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 1, 0);
		  ++sc;
		  --max_insns;
		  ++icount;
		  pc += 2;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (sc - 1);
		      break;
		    }
		  /* While we're guaranteed that there's room to extract the
		     insn, when single stepping we can't; the pbb must stop
		     after the first insn.  */
		  if (max_insns == 0)
		    break;
		  idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 1, 0);
		  ++sc;
		  --max_insns;
		  ++icount;
		  pc += 2;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (sc - 1);
		      break;
		    }
		}
	    }
	}
    }
  else /* ! FAST_P */
    {
      while (max_insns > 0)
	{
	  USI insn = GETIMEMUSI (current_cpu, pc);
	  int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
	  int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
	  SCACHE *cti_sc; /* ??? tmp hack */
	  if ((SI) insn < 0)
	    {
	      /* 32 bit insn
		 Only emit before/after handlers if necessary.  */
	      if (trace_p || profile_p)
		{
		  idesc = emit_full32 (current_cpu, pc, insn, sc,
				       trace_p, profile_p);
		  cti_sc = sc + 1;
		  sc += 3;
		  max_insns -= 3;
		}
	      else
		{
		  idesc = emit_32 (current_cpu, pc, insn, sc, 0);
		  cti_sc = sc;
		  ++sc;
		  --max_insns;
		}
	      ++icount;
	      pc += 4;
	      if (IDESC_CTI_P (idesc))
		{
		  SET_CTI_VPC (cti_sc);
		  break;
		}
	    }
	  else
	    {
	      if ((insn & 0x8000) != 0) /* parallel? */
		{
		  /* Yep.  Here's the "interesting" [sic] part.
		     Only emit before/after handlers if necessary.  */
		  if (trace_p || profile_p)
		    {
		      if (((insn >> 16) & 0xfff0) == 0x10f0)
			{
			  /* FIXME: No need to handle this sequentially if
			     system calls will be able to execute after second
			     insn in parallel. ( trap #num || insn ) */
			  /* insn */
			  idesc = emit_full16 (current_cpu, pc + 2,
					       insn & 0x7fff, sc, 0, 0);
			  /* trap */
			  emit_full16 (current_cpu, pc, insn >> 16, sc + 3,
				       0, 0);
			}
		      else
			{
		          idesc = emit_full_parallel (current_cpu, pc, insn,
						      sc, trace_p, profile_p);
			}
		      cti_sc = sc + 1;
		      sc += 6;
		      max_insns -= 6;
		    }
		  else
		    {
		      int up_count;

		      if (((insn >> 16) & 0xfff0) == 0x10f0)
			{
                          /* FIXME: No need to handle this sequentially if
                             system calls will be able to execute after second
                             insn in parallel. ( trap #num || insn ) */
                          /* insn */
                          idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
                                           sc, 0, 0);
                          /* trap */
                          emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 0);
                          up_count = 2;
			}
		      else
			{
		          idesc = emit_parallel (current_cpu, pc, insn, sc, 0);
                          up_count = 3;
			}
		      cti_sc = sc;
		      sc += up_count;
		      max_insns -= up_count;
		    }
		  icount += 2;
		  pc += 4;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (cti_sc);
		      break;
		    }
		}
	      else /* 2 serial 16 bit insns */
		{
		  /* Only emit before/after handlers if necessary.  */
		  if (trace_p || profile_p)
		    {
		      idesc = emit_full16 (current_cpu, pc, insn >> 16, sc,
					   trace_p, profile_p);
		      cti_sc = sc + 1;
		      sc += 3;
		      max_insns -= 3;
		    }
		  else
		    {
		      idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 0, 0);
		      cti_sc = sc;
		      ++sc;
		      --max_insns;
		    }
		  ++icount;
		  pc += 2;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (cti_sc);
		      break;
		    }
		  /* While we're guaranteed that there's room to extract the
		     insn, when single stepping we can't; the pbb must stop
		     after the first insn.  */
		  if (max_insns <= 0)
		    break;
		  /* Use the same trace/profile address for the 2nd insn.
		     Saves us having to compute it and they come in pairs
		     anyway (e.g. can never branch to the 2nd insn).  */
		  if (trace_p || profile_p)
		    {
		      idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
					   trace_p, profile_p);
		      cti_sc = sc + 1;
		      sc += 3;
		      max_insns -= 3;
		    }
		  else
		    {
		      idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 0, 0);
		      cti_sc = sc;
		      ++sc;
		      --max_insns;
		    }
		  ++icount;
		  pc += 2;
		  if (IDESC_CTI_P (idesc))
		    {
		      SET_CTI_VPC (cti_sc);
		      break;
		    }
		}
	    }
	}
    }

 Finish:
  SET_INSN_COUNT (icount);
}
EOF

;;

xfull-exec-pbb)

# Inputs: current_cpu, vpc, FAST_P
# Outputs: vpc
# vpc is the virtual program counter.

cat <<EOF
#define DEFINE_SWITCH
#include "semx-switch.c"
EOF

;;

*)
  echo "Invalid argument to mainloop.in: $1" >&2
  exit 1
  ;;

esac