rndr_gu2.c   [plain text]


/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/nsc/gfx/rndr_gu2.c,v 1.1 2002/12/10 15:12:27 alanh Exp $ */
/*
 * $Workfile: rndr_gu2.c $
 *
 * This file contains routines to program the 2D acceleration hardware for
 * the second generation graphics unit.
 * 
 * Basic rendering routines (common to all Geode processors):
 *    gfx_set_bpp           
 *    gfx_set_solid_pattern  
 *    gfx_set_mono_pattern
 *    gfx_set_color_pattern
 *    gfx_set_solid_source  
 *    gfx_set_mono_source
 *    gfx_set_raster_operation
 *    gfx_pattern_fill
 *    gfx_color_pattern_fill
 *    gfx_screen_to_screen_blt
 *    gfx_screen_to_screen_xblt
 *    gfx_color_bitmap_to_screen_blt
 *    gfx_color_bitmap_to_screen_xblt
 *    gfx_mono_bitmap_to_screen_blt
 *    gfx_bresenham_line 
 *    gfx_wait_until_idle   
 *
 * Extended rendering routines for second generation functionality:
 *    gfx2_set_source_stride
 *    gfx2_set_destination_stride
 *    gfx2_set_pattern_origins
 *    gfx2_set_source_transparency
 *    gfx2_set_alpha_mode
 *    gfx2_set_alpha_value
 *    gfx2_pattern_fill
 *    gfx2_color_pattern_fill
 *    gfx2_screen_to_screen_blt
 *    gfx2_mono_expand_blt
 *    gfx2_color_bitmap_to_screen_blt
 *    gfx2_mono_bitmap_to_screen_blt
 *    gfx2_bresenham_line
 *    gfx2_sync_to_vblank
 *
 * NSC_LIC_ALTERNATIVE_PREAMBLE
 *
 * Revision 1.0
 *
 * National Semiconductor Alternative GPL-BSD License
 *
 * National Semiconductor Corporation licenses this software 
 * ("Software"):
 *
 *      Durango
 *
 * under one of the two following licenses, depending on how the 
 * Software is received by the Licensee.
 * 
 * If this Software is received as part of the Linux Framebuffer or
 * other GPL licensed software, then the GPL license designated 
 * NSC_LIC_GPL applies to this Software; in all other circumstances 
 * then the BSD-style license designated NSC_LIC_BSD shall apply.
 *
 * END_NSC_LIC_ALTERNATIVE_PREAMBLE */

/* NSC_LIC_BSD
 *
 * National Semiconductor Corporation Open Source License for Durango
 *
 * (BSD License with Export Notice)
 *
 * Copyright (c) 1999-2001
 * National Semiconductor Corporation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met: 
 *
 *   * Redistributions of source code must retain the above copyright 
 *     notice, this list of conditions and the following disclaimer. 
 *
 *   * Redistributions in binary form must reproduce the above 
 *     copyright notice, this list of conditions and the following 
 *     disclaimer in the documentation and/or other materials provided 
 *     with the distribution. 
 *
 *   * Neither the name of the National Semiconductor Corporation nor 
 *     the names of its contributors may be used to endorse or promote 
 *     products derived from this software without specific prior 
 *     written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
 * NATIONAL SEMICONDUCTOR CORPORATION OR CONTRIBUTORS BE LIABLE FOR ANY 
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE,
 * INTELLECTUAL PROPERTY INFRINGEMENT, OR OTHERWISE) ARISING IN ANY WAY 
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
 * OF SUCH DAMAGE.
 *
 * EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF 
 * YOUR JURISDICTION. It is licensee's responsibility to comply with 
 * any export regulations applicable in licensee's jurisdiction. Under 
 * CURRENT (2001) U.S. export regulations this software 
 * is eligible for export from the U.S. and can be downloaded by or 
 * otherwise exported or reexported worldwide EXCEPT to U.S. embargoed 
 * destinations which include Cuba, Iraq, Libya, North Korea, Iran, 
 * Syria, Sudan, Afghanistan and any other country to which the U.S. 
 * has embargoed goods and services. 
 *
 * END_NSC_LIC_BSD */

/* NSC_LIC_GPL
 *
 * National Semiconductor Corporation Gnu General Public License for Durango
 *
 * (GPL License with Export Notice)
 *
 * Copyright (c) 1999-2001
 * National Semiconductor Corporation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted under the terms of the GNU General 
 * Public License as published by the Free Software Foundation; either 
 * version 2 of the License, or (at your option) any later version  
 *
 * In addition to the terms of the GNU General Public License, neither 
 * the name of the National Semiconductor Corporation nor the names of 
 * its contributors may be used to endorse or promote products derived 
 * from this software without specific prior written permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
 * NATIONAL SEMICONDUCTOR CORPORATION OR CONTRIBUTORS BE LIABLE FOR ANY 
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE, 
 * INTELLECTUAL PROPERTY INFRINGEMENT, OR OTHERWISE) ARISING IN ANY WAY 
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
 * OF SUCH DAMAGE. See the GNU General Public License for more details. 
 *
 * EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF 
 * YOUR JURISDICTION. It is licensee's responsibility to comply with 
 * any export regulations applicable in licensee's jurisdiction. Under 
 * CURRENT (2001) U.S. export regulations this software 
 * is eligible for export from the U.S. and can be downloaded by or 
 * otherwise exported or reexported worldwide EXCEPT to U.S. embargoed 
 * destinations which include Cuba, Iraq, Libya, North Korea, Iran, 
 * Syria, Sudan, Afghanistan and any other country to which the U.S. 
 * has embargoed goods and services. 
 *
 * You should have received a copy of the GNU General Public License 
 * along with this file; if not, write to the Free Software Foundation, 
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 *
 * END_NSC_LIC_GPL */

void gu2_set_bpp(unsigned short bpp);
void gu2_set_solid_pattern(unsigned long color);
void gu2_set_mono_pattern(unsigned long bgcolor, unsigned long fgcolor,
			  unsigned long data0, unsigned long data1,
			  unsigned char transparency);
void gu2_set_color_pattern(unsigned long bgcolor, unsigned long fgcolor,
			   unsigned long data0, unsigned long data1,
			   unsigned long data2, unsigned long data3,
			   unsigned char transparency);
void gu2_load_color_pattern_line(short y, unsigned long *pattern_8x8);
void gu2_set_solid_source(unsigned long color);
void gu2_set_mono_source(unsigned long bgcolor, unsigned long fgcolor,
			 unsigned short transparent);
void gu2_set_pattern_flags(unsigned short flags);
void gu2_set_raster_operation(unsigned char rop);
void gu2_pattern_fill(unsigned short x, unsigned short y,
		      unsigned short width, unsigned short height);
void gu2_color_pattern_fill(unsigned short x, unsigned short y,
			    unsigned short width, unsigned short height,
			    unsigned long *pattern);
void gu2_screen_to_screen_blt(unsigned short srcx, unsigned short srcy,
			      unsigned short dstx, unsigned short dsty,
			      unsigned short width, unsigned short height);
void gu2_screen_to_screen_xblt(unsigned short srcx, unsigned short srcy,
			       unsigned short dstx, unsigned short dsty,
			       unsigned short width, unsigned short height,
			       unsigned long color);
void gu2_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				    unsigned short dstx, unsigned short dsty,
				    unsigned short width,
				    unsigned short height,
				    unsigned char *data, long pitch);
void gu2_color_bitmap_to_screen_xblt(unsigned short srcx, unsigned short srcy,
				     unsigned short dstx, unsigned short dsty,
				     unsigned short width,
				     unsigned short height,
				     unsigned char *data, long pitch,
				     unsigned long color);
void gu2_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				   unsigned short dstx, unsigned short dsty,
				   unsigned short width,
				   unsigned short height, unsigned char *data,
				   short pitch);
void gu2_text_blt(unsigned short dstx, unsigned short dsty,
		  unsigned short width, unsigned short height,
		  unsigned char *data);
void gu2_bresenham_line(unsigned short x, unsigned short y,
			unsigned short length, unsigned short initerr,
			unsigned short axialerr, unsigned short diagerr,
			unsigned short flags);
void gu2_wait_until_idle(void);
int gu2_test_blt_pending(void);

/* SECOND GENERATION RENDERING ROUTINES */

void gu22_set_source_stride(unsigned short stride);
void gu22_set_destination_stride(unsigned short stride);
void gu22_set_pattern_origin(int x, int y);
void gu22_set_source_transparency(unsigned long color, unsigned long mask);
void gu22_set_alpha_mode(int mode);
void gu22_set_alpha_value(unsigned char value);
void gu22_pattern_fill(unsigned long dstoffset, unsigned short width,
		       unsigned short height);
void gu22_color_pattern_fill(unsigned long dstoffset, unsigned short width,
			     unsigned short height, unsigned long *pattern);
void gu22_screen_to_screen_blt(unsigned long srcoffset,
			       unsigned long dstoffset, unsigned short width,
			       unsigned short height, int flags);
void gu22_mono_expand_blt(unsigned long srcbase, unsigned short srcx,
			  unsigned short srcy, unsigned long dstoffset,
			  unsigned short width, unsigned short height,
			  int byte_packed);
void gu22_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				     unsigned long dstoffset,
				     unsigned short width,
				     unsigned short height,
				     unsigned char *data, short pitch);
void gu22_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				    unsigned long dstoffset,
				    unsigned short width,
				    unsigned short height,
				    unsigned char *data, short pitch);
void gu22_text_blt(unsigned long dstoffset, unsigned short width,
		   unsigned short height, unsigned char *data);
void gu22_bresenham_line(unsigned long dstoffset, unsigned short length,
			 unsigned short initerr, unsigned short axialerr,
			 unsigned short diagerr, unsigned short flags);
void gu22_sync_to_vblank(void);
void gu2_reset_pitch(unsigned short pitch);

#define GU2_WAIT_PENDING while(READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_PENDING)
#define GU2_WAIT_BUSY while(READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_BUSY)
#define GU2_WAIT_HALF_EMPTY while(!(READ_GP32(MGP_BLT_STATUS) & MGP_BS_HALF_EMPTY))

/* PATTERN SWIZZLES */

#define WORD_SWIZZLE(x) (((x) << 16) | ((x) >> 16))
#define BYTE_SWIZZLE(x) (((x) << 24) | ((x) >> 24) | (((x) << 8) & 0x00FF0000) | (((x) >> 8) & 0x0000FF00))

/* GLOBAL VARIABLES USED BY THE RENDERING ROUTINES */

unsigned long gu2_bpp;
unsigned long gu2_pitch = 1280;
unsigned long gu2_src_pitch = 1280;
unsigned long gu2_dst_pitch = 1280;
unsigned long gu2_xshift = 1;
unsigned long gu2_pattern_origin = 0;
unsigned long gu2_rop32;
unsigned long gu2_alpha32 = 0;
unsigned long gu2_alpha_value = 0;
unsigned long gu2_alpha_mode = 0;
unsigned long gu2_alpha_active = 0;
unsigned short gu2_alpha_blt_mode = 0;
unsigned short gu2_alpha_vec_mode = 0;
unsigned short gu2_blt_mode = 0;
unsigned short gu2_vector_mode = 0;
unsigned short gu2_bm_throttle = 0;
unsigned short gu2_vm_throttle = 0;
int gu2_current_line = 0;

/*---------------------------------------------------------------------------
 * GFX_RESET_PITCH (PRIVATE ROUTINE - NOT PART OF API)
 *
 * This routine resets all pitches in the graphics engine to one value.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu2_reset_pitch(unsigned short pitch)
#else
void
gfx_reset_pitch(unsigned short pitch)
#endif
{
   gu2_pitch = pitch;
   gu2_dst_pitch = pitch;
   gu2_src_pitch = pitch;
}

/*---------------------------------------------------------------------------
 * GFX_SET_BPP
 *
 * This routine sets the bits per pixel value in the graphics engine.
 * It is also stored in the static variable "gu2_bpp" to use in the future 
 * calls to the rendering routines.  That variable contains the hardware 
 * specific value to load into the MGP_RASTER_MODE register.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_bpp(unsigned short bpp)
#else
void
gfx_set_bpp(unsigned short bpp)
#endif
{
   GFXbpp = bpp;

   /* COVERT TO BPP/FORMAT VALUE */
   /* Save in global to combine with ROP later. */
   /* Could write register here and then use byte access for */
   /* the ROP, but would need to set other 24 bits to make */
   /* sure all are set to their appropriate values. */

   switch (bpp) {
   case 8:
      gu2_bpp = MGP_RM_BPPFMT_332;
      gu2_xshift = 0;
      break;
   case 12:
      gu2_bpp = MGP_RM_BPPFMT_4444;
      gu2_xshift = 1;
      break;
   case 15:
      gu2_bpp = MGP_RM_BPPFMT_1555;
      gu2_xshift = 1;
      break;
   case 16:
      gu2_bpp = MGP_RM_BPPFMT_565;
      gu2_xshift = 1;
      break;
   case 32:
      gu2_bpp = MGP_RM_BPPFMT_8888;
      gu2_xshift = 2;
      break;
   }

   /* SET INITIAL ROP BASED ONLY ON BPP */
   /* Needs to be set before loading any pattern or source colors. */
   /* We must wait for BUSY because these bits are not pipelined   */
   /* in the hardware.                                             */

   GU2_WAIT_BUSY;
   WRITE_GP32(MGP_RASTER_MODE, gu2_bpp);
}

/*
//---------------------------------------------------------------------------
// GFX_SET_SOLID_SOURCE
//
// This routine is used to specify a solid source color.  For the Xfree96
// display driver, the source color is used to specify a planemask and the 
// ROP is adjusted accordingly.
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_solid_source(unsigned long color)
#else
void
gfx_set_solid_source(unsigned long color)
#endif
{
   /* CLEAR TRANSPARENCY FLAG */

   GFXsourceFlags = 0;

   /* WRITE REGISTERS TO SPECIFY SOURCE COLOR */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_SRC_COLOR_FG, color);
}

/*
//---------------------------------------------------------------------------
// GFX_SET_MONO_SOURCE
//
// This routine is used to specify the monochrome source colors.  
// It must be called *after* loading any pattern data (those routines 
// clear the source flags).
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_mono_source(unsigned long bgcolor, unsigned long fgcolor,
		    unsigned short transparent)
#else
void
gfx_set_mono_source(unsigned long bgcolor, unsigned long fgcolor,
		    unsigned short transparent)
#endif
{
   /* SET TRANSPARENCY FLAG */

   GFXsourceFlags = transparent ? MGP_RM_SRC_TRANS : 0;

   /* WRITE COLOR VALUES */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_SRC_COLOR_FG, fgcolor);
   WRITE_GP32(MGP_SRC_COLOR_BG, bgcolor);
}

/*
//---------------------------------------------------------------------------
// GFX_SET_SOLID_PATTERN
//
// This routine is used to specify a solid pattern color.  It is called 
// before performing solid rectangle fills or more complicated BLTs that 
// use a solid pattern color. 
//
// The driver should always call "gfx_load_raster_operation" after a call 
// to this routine to make sure that the pattern flags are set appropriately.
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_solid_pattern(unsigned long color)
#else
void
gfx_set_solid_pattern(unsigned long color)
#endif
{
   /* CLEAR TRANSPARENCY FLAG */

   GFXsourceFlags = 0;

   /* SET PATTERN FLAGS */

   GFXpatternFlags = 0;

   /* POLL UNTIL ABLE TO WRITE THE PATTERN COLOR */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_bpp);
   WRITE_GP32(MGP_PAT_COLOR_0, color);
}

/*
//---------------------------------------------------------------------------
// GFX_SET_MONO_PATTERN
//
// This routine is used to specify a monochrome pattern. 
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_mono_pattern(unsigned long bgcolor, unsigned long fgcolor,
		     unsigned long data0, unsigned long data1,
		     unsigned char transparent)
#else
void
gfx_set_mono_pattern(unsigned long bgcolor, unsigned long fgcolor,
		     unsigned long data0, unsigned long data1,
		     unsigned char transparent)
#endif
{
   /* CLEAR TRANSPARENCY FLAG */

   GFXsourceFlags = 0;

   /* SET PATTERN FLAGS */

   if (transparent)
      GFXpatternFlags = MGP_RM_PAT_MONO | MGP_RM_PAT_TRANS;
   else
      GFXpatternFlags = MGP_RM_PAT_MONO;

   /* POLL UNTIL ABLE TO WRITE THE PATTERN COLOR */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_bpp | GFXpatternFlags);
   WRITE_GP32(MGP_PAT_COLOR_0, bgcolor);
   WRITE_GP32(MGP_PAT_COLOR_1, fgcolor);
   WRITE_GP32(MGP_PAT_DATA_0, data0);
   WRITE_GP32(MGP_PAT_DATA_1, data1);
}

/*
//---------------------------------------------------------------------------
// GFX_SET_COLOR_PATTERN
//
// This routine is used to specify a color pattern. 
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_color_pattern(unsigned long bgcolor, unsigned long fgcolor,
		      unsigned long data0, unsigned long data1,
		      unsigned long data2, unsigned long data3,
		      unsigned char transparent)
#else
void
gfx_set_color_pattern(unsigned long bgcolor, unsigned long fgcolor,
		      unsigned long data0, unsigned long data1,
		      unsigned long data2, unsigned long data3,
		      unsigned char transparent)
#endif
{
   /* REMOVE */
}

/*
//---------------------------------------------------------------------------
// GFX_LOAD_COLOR_PATTERN_LINE
//
// This routine is used to load a single line of a 8x8 color pattern.   
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_load_color_pattern_line(short y, unsigned long *pattern_8x8)
#else
void
gfx_load_color_pattern_line(short y, unsigned long *pattern_8x8)
#endif
{
   unsigned long temp1, temp2, temp3, temp4;

   /* CLEAR TRANSPARENCY FLAG */

   GFXsourceFlags = 0;

   /* SET PATTERN FLAGS */

   GFXpatternFlags = MGP_RM_PAT_COLOR;

   /* OVERRIDE THE RASTER MODE REGISTER */
   /* If the pattern format is set to anything but color  */
   /* before loading the registers, some of the data will */
   /* be duplicated according to the current mode.        */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE,
	      (gu2_rop32 & ~MGP_RM_PAT_FLAGS) | MGP_RM_PAT_COLOR);

   /* LOAD THE PATTERN DATA */
   /* This routine is designed to work in tandem with gfx_pattern_fill.  */
   /* It is used for cases when multiple BLTs with color pattern data    */
   /* are desired on the same line.  It would be inefficient to          */
   /* repeatedly call gfx_color_pattern_fill for each single-line BLT.   */
   /* So, we will simply replicate the pattern data across all available */
   /* lines such that the pattern y origin plays no part in the BLT.     */

   /* 8 BPP */

   if (gu2_xshift == 0) {
      pattern_8x8 += (y & 7) << 1;
      temp1 = BYTE_SWIZZLE(pattern_8x8[0]);
      temp2 = BYTE_SWIZZLE(pattern_8x8[1]);
      WRITE_GP32(MGP_PAT_DATA_1, temp1);
      WRITE_GP32(MGP_PAT_DATA_0, temp2);
      WRITE_GP32(MGP_PAT_COLOR_1, temp1);
      WRITE_GP32(MGP_PAT_COLOR_0, temp2);

      GU2_WAIT_BUSY;
      WRITE_GP32(MGP_PAT_COLOR_3, temp1);
      WRITE_GP32(MGP_PAT_COLOR_2, temp2);
      WRITE_GP32(MGP_PAT_COLOR_5, temp1);
      WRITE_GP32(MGP_PAT_COLOR_4, temp2);
   } else if (gu2_xshift == 1) {
      pattern_8x8 += (y & 7) << 2;
      temp1 = WORD_SWIZZLE(pattern_8x8[0]);
      temp2 = WORD_SWIZZLE(pattern_8x8[1]);
      temp3 = WORD_SWIZZLE(pattern_8x8[2]);
      temp4 = WORD_SWIZZLE(pattern_8x8[3]);

      WRITE_GP32(MGP_PAT_COLOR_1, temp1);
      WRITE_GP32(MGP_PAT_COLOR_0, temp2);
      WRITE_GP32(MGP_PAT_DATA_1, temp3);
      WRITE_GP32(MGP_PAT_DATA_0, temp4);

      GU2_WAIT_BUSY;
      WRITE_GP32(MGP_PAT_COLOR_5, temp1);
      WRITE_GP32(MGP_PAT_COLOR_4, temp2);
      WRITE_GP32(MGP_PAT_COLOR_3, temp3);
      WRITE_GP32(MGP_PAT_COLOR_2, temp4);
   } else {
      pattern_8x8 += (y & 7) << 3;

      WRITE_GP32(MGP_PAT_COLOR_1, pattern_8x8[4]);
      WRITE_GP32(MGP_PAT_COLOR_0, pattern_8x8[5]);
      WRITE_GP32(MGP_PAT_DATA_1, pattern_8x8[6]);
      WRITE_GP32(MGP_PAT_DATA_0, pattern_8x8[7]);

      GU2_WAIT_BUSY;
      WRITE_GP32(MGP_PAT_COLOR_5, pattern_8x8[0]);
      WRITE_GP32(MGP_PAT_COLOR_4, pattern_8x8[1]);
      WRITE_GP32(MGP_PAT_COLOR_3, pattern_8x8[2]);
      WRITE_GP32(MGP_PAT_COLOR_2, pattern_8x8[3]);
   }
}

/*
//---------------------------------------------------------------------------
// GFX_SET_RASTER_OPERATION
//
// This routine loads the specified raster operation.  It sets the pattern
// flags appropriately.
//---------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_set_raster_operation(unsigned char rop)
#else
void
gfx_set_raster_operation(unsigned char rop)
#endif
{
   gu2_blt_mode = 0;

   /* DISABLE ALPHA BLENDING */

   gu2_alpha_active = 0;

   /* GENERATE 32-BIT VERSION OF ROP WITH PATTERN FLAGS */

   gu2_rop32 = (unsigned long)rop | GFXpatternFlags | gu2_bpp;

   /* CHECK IF SOURCE FLAGS SHOULD BE MERGED */

   if ((rop & 0x33) ^ ((rop >> 2) & 0x33))
      gu2_rop32 |= GFXsourceFlags;
   else
      gu2_blt_mode = 0x40;

   /* SET FLAG INDICATING ROP REQUIRES DESTINATION DATA */
   /* True if even bits (0:2:4:6) do not equal the corresponding */
   /* even bits (1:3:5:7). */

   if ((rop & 0x55) ^ ((rop >> 1) & 0x55)) {
      gu2_blt_mode |= MGP_BM_DST_REQ;
      gu2_vector_mode = MGP_VM_DST_REQ;
   } else {
      gu2_vector_mode = 0;
   }
}

/*
//----------------------------------------------------------------------------
// GFX_PATTERN_FILL
//
// This routine is used to fill a rectangular region.  The pattern must 
// be previously loaded using one of GFX_load_*_pattern routines.  Also, the 
// raster operation must be previously specified using the 
// "GFX_load_raster_operation" routine.
//
//      X               screen X position (left)
//      Y               screen Y position (top)
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_pattern_fill(unsigned short x, unsigned short y,
		 unsigned short width, unsigned short height)
#else
void
gfx_pattern_fill(unsigned short x, unsigned short y,
		 unsigned short width, unsigned short height)
#endif
{
   unsigned long offset = 0, size;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE STARTING OFFSET */

   offset = (unsigned long)y *gu2_pitch + (((unsigned long)x) << gu2_xshift);

   /* CHECK IF PATTERN ORIGINS NEED TO BE SET */

   if (GFXpatternFlags) {
      /* COMBINE X AND Y PATTERN ORIGINS WITH OFFSET */

      offset |= ((unsigned long)(x & 7)) << 26;
      offset |= ((unsigned long)(y & 7)) << 29;
   }

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_DST_OFFSET, offset);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_pitch);
   WRITE_GP32(MGP_BLT_MODE, gu2_blt_mode);
}

/*
//----------------------------------------------------------------------------
// GFX_COLOR_PATTERN_FILL
//
// This routine is used to render a rectangle using the current raster 
// operation and the specified color pattern.  It allows an 8x8 color 
// pattern to be rendered without multiple calls to the gfx_set_color_pattern
// and gfx_pattern_fill routines.
//
//      X               screen X position (left)
//      Y               screen Y position (top)
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//      *PATTERN		pointer to 8x8 color pattern data
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_color_pattern_fill(unsigned short x, unsigned short y,
		       unsigned short width, unsigned short height,
		       unsigned long *pattern)
#else
void
gfx_color_pattern_fill(unsigned short x, unsigned short y,
		       unsigned short width, unsigned short height,
		       unsigned long *pattern)
#endif
{
   /* CALL GFX2 ROUTINE TO AVOID DUPLICATION OF CODE */

   unsigned long offset = (unsigned long)y * gu2_pitch +
	 (((unsigned long)x) << gu2_xshift);
   unsigned long origin = gu2_pattern_origin;
   unsigned long pitch = gu2_dst_pitch;

   gfx2_set_pattern_origin(x, y);
   gfx2_set_destination_stride((unsigned short)gu2_pitch);
   gfx2_color_pattern_fill(offset, width, height, pattern);

   /* RESTORE GFX2 VALUES */

   gu2_pattern_origin = origin;
   gu2_dst_pitch = pitch;
}

/*
//----------------------------------------------------------------------------
// SCREEN TO SCREEN BLT
//
// This routine should be used to perform a screen to screen BLT when the 
// ROP does not require destination data.
//
//      SRCX            screen X position to copy from
//      SRCY            screen Y position to copy from
//      DSTX            screen X position to copy to
//      DSTY            screen Y position to copy to
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_screen_to_screen_blt(unsigned short srcx, unsigned short srcy,
			 unsigned short dstx, unsigned short dsty,
			 unsigned short width, unsigned short height)
#else
void
gfx_screen_to_screen_blt(unsigned short srcx, unsigned short srcy,
			 unsigned short dstx, unsigned short dsty,
			 unsigned short width, unsigned short height)
#endif
{
   unsigned long srcoffset, dstoffset, size;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE THE DIRECTION OF THE BLT */

   blt_mode = gu2_blt_mode | MGP_BM_SRC_FB;
   if (dstx > srcx) {
      blt_mode |= MGP_BM_NEG_XDIR;
      srcx += width - 1;
      dstx += width - 1;
   }
   if (dsty > srcy) {
      blt_mode |= MGP_BM_NEG_YDIR;
      srcy += height - 1;
      dsty += height - 1;
   }

   /* CALCULATE STARTING OFFSETS */

   srcoffset = (unsigned long)srcy *gu2_pitch +
	 (((unsigned long)srcx) << gu2_xshift);
   dstoffset = ((unsigned long)dsty * gu2_pitch +
		(((unsigned long)dstx) << gu2_xshift)) & 0xFFFFFF;

   /* MERGE PATTERN INFORMATION */
   /* This must be done after the x and y coordinates have been updated,  */
   /* as the x and y pattern origins correspond to the first ROPed pixel. */

   if (GFXpatternFlags) {
      /* COMBINE X AND Y PATTERN ORIGINS WITH OFFSET */

      dstoffset |= ((unsigned long)(dstx & 7)) << 26;
      dstoffset |= ((unsigned long)(dsty & 7)) << 29;
   }

   /* TURN INTO BYTE ADDRESS IF NEGATIVE X DIRECTION */
   /* This is a quirk of the hardware. */

   if (blt_mode & MGP_BM_NEG_XDIR) {
      srcoffset += (1 << gu2_xshift) - 1;
      dstoffset += (1 << gu2_xshift) - 1;
   }

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_SRC_OFFSET, srcoffset);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_pitch | (gu2_pitch << 16));
   WRITE_GP16(MGP_BLT_MODE, blt_mode);
}

/*
//----------------------------------------------------------------------------
// SCREEN TO SCREEN TRANSPARENT BLT
//
// This routine should be used to perform a screen to screen BLT when a 
// specified color should by transparent.  The only supported ROP is SRCCOPY.
//
//      SRCX            screen X position to copy from
//      SRCY            screen Y position to copy from
//      DSTX            screen X position to copy to
//      DSTY            screen Y position to copy to
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//      COLOR           transparent color
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_screen_to_screen_xblt(unsigned short srcx, unsigned short srcy,
			  unsigned short dstx, unsigned short dsty,
			  unsigned short width, unsigned short height,
			  unsigned long color)
#else
void
gfx_screen_to_screen_xblt(unsigned short srcx, unsigned short srcy,
			  unsigned short dstx, unsigned short dsty,
			  unsigned short width, unsigned short height,
			  unsigned long color)
#endif
{
   unsigned long rop32;

   /* SAVE ORIGINAL RASTER MODE SETTINGS */

   rop32 = gu2_rop32;

   /* WRITE REGISTERS TO SPECIFY COLOR TRANSPARENCY */
   /* Match GU1 implementation that only allows SRCCOPY for the ROP. */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_SRC_COLOR_FG, color);
   WRITE_GP32(MGP_SRC_COLOR_BG, 0xFFFFFFFF);

   /* SET GLOBAL RASTER SETTINGS */
   /* This is needed, as the screen-to-screen BLT      */
   /* routine will overwrite the raster mode register. */

   gu2_rop32 = gu2_bpp | MGP_RM_SRC_TRANS | 0xCC;

   /* CALL NORMAL SCREEN TO SCREEN BLT ROUTINE */

   gfx_screen_to_screen_blt(srcx, srcy, dstx, dsty, width, height);

   /* RESTORE GLOBAL RASTER SETTINGS */

   gu2_rop32 = rop32;
}

/*
//----------------------------------------------------------------------------
// COLOR BITMAP TO SCREEN BLT
//
// This routine transfers color bitmap data to the screen.
//
//      SRCX            X offset within source bitmap
//      SRCY            Y offset within source bitmap
//      DSTX            screen X position to render data
//      DSTY            screen Y position to render data
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//      *DATA           pointer to bitmap data
//      PITCH           pitch of bitmap data (bytes between scanlines)
//
// Transparency is handled by another routine.
//----------------------------------------------------------------------------
*/

#if GFX_2DACCEL_DYNAMIC
void
gu2_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			       unsigned short dstx, unsigned short dsty,
			       unsigned short width, unsigned short height,
			       unsigned char *data, long pitch)
#else
void
gfx_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			       unsigned short dstx, unsigned short dsty,
			       unsigned short width, unsigned short height,
			       unsigned char *data, long pitch)
#endif
{
   unsigned long dstoffset, srcoffset, size, bytes;
   unsigned long offset, temp_offset;
   unsigned long dword_bytes, bytes_extra;
   unsigned short blt_mode;

   blt_mode = gu2_blt_mode | MGP_BM_SRC_FB;
   size = (((unsigned long)width) << 16) | 1;

   /* CALCULATE STARTING OFFSETS */

   offset = (unsigned long)srcy *pitch + ((unsigned long)srcx << gu2_xshift);

   dstoffset = (unsigned long)dsty *gu2_pitch +
	 (((unsigned long)dstx) << gu2_xshift);

   /* CHECK IF PATTERN ORIGINS NEED TO BE SET */

   if (GFXpatternFlags) {
      /* COMBINE X AND Y PATTERN ORIGINS WITH OFFSET */

      dstoffset |= ((unsigned long)(dstx & 7)) << 26;
      dstoffset |= ((unsigned long)(dsty & 7)) << 29;
   }

   bytes = width << gu2_xshift;
   dword_bytes = bytes & ~0x3L;
   bytes_extra = bytes & 0x3L;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first).   */
   /* The source offset is always 0 since we allow misaligned dword reads. */
   /* We must wait for BLT busy because the GP may be executing a screen   */
   /* to screen BLT from the scratchpad area.                              */

   GU2_WAIT_BUSY;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_pitch);

   /* WRITE DATA ONE LINE AT A TIME */
   /* For speed reasons, data is written to an offscreen scratch area and then        */
   /* BLTed using a screen to screen BLT. This is similar to the GX1 BLT buffers, but */
   /* slightly more efficient in that we can queue up data while the GP is rendering  */
   /* a line.                                                                         */

   while (height--) {
      temp_offset = offset;
      srcoffset = gfx_gx2_scratch_base;
      if (gu2_current_line)
	 srcoffset += 8192;

      GU2_WAIT_PENDING;
      WRITE_GP32(MGP_SRC_OFFSET, srcoffset);
      WRITE_GP32(MGP_DST_OFFSET, dstoffset);
      dstoffset += gu2_pitch;
      dstoffset += 0x20000000;

      WRITE_FRAME_BUFFER_STRING32(srcoffset, dword_bytes, data, temp_offset);
      if (bytes_extra) {
	 temp_offset += dword_bytes;
	 srcoffset += dword_bytes;
	 WRITE_FRAME_BUFFER_STRING8(srcoffset, bytes_extra, data,
				    temp_offset);
      }
      WRITE_GP16(MGP_BLT_MODE, blt_mode);
      offset += pitch;
      gu2_current_line = 1 - gu2_current_line;
   }
}

/*
//----------------------------------------------------------------------------
// COLOR BITMAP TO SCREEN TRANSPARENT BLT
//
// This routine transfers color bitmap data to the screen with transparency.
// The transparent color is specified.  The only supported ROP is SRCCOPY, 
// meaning that transparency cannot be applied if the ROP requires 
// destination data (this is a hardware restriction).
//
//      SRCX            X offset within source bitmap
//      SRCY            Y offset within source bitmap
//      DSTX            screen X position to render data
//      DSTY            screen Y position to render data
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//      *DATA           pointer to bitmap data
//      PITCH           pitch of bitmap data (bytes between scanlines)
//      COLOR           transparent color
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_color_bitmap_to_screen_xblt(unsigned short srcx, unsigned short srcy,
				unsigned short dstx, unsigned short dsty,
				unsigned short width, unsigned short height,
				unsigned char *data, long pitch,
				unsigned long color)
#else
void
gfx_color_bitmap_to_screen_xblt(unsigned short srcx, unsigned short srcy,
				unsigned short dstx, unsigned short dsty,
				unsigned short width, unsigned short height,
				unsigned char *data, long pitch,
				unsigned long color)
#endif
{
   unsigned long rop32;

   /* SAVE EXISTING RASTER MODE SETTINGS */

   rop32 = gu2_rop32;

   /* WRITE REGISTERS TO SPECIFY COLOR TRANSPARENCY */
   /* Match GU1 implementation that only allows SRCCOPY for the ROP. */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_SRC_COLOR_FG, color);
   WRITE_GP32(MGP_SRC_COLOR_BG, 0xFFFFFFFF);

   /* SET GLOBAL RASTER SETTINGS */
   /* This is needed, as the screen-to-screen BLT      */
   /* routine will overwrite the raster mode register. */

   gu2_rop32 = gu2_bpp | MGP_RM_SRC_TRANS | 0xCC;

   /* CALL NORMAL COLOR BITMAP TO SCREEN BLT ROUTINE */

   gfx_color_bitmap_to_screen_blt(srcx, srcy, dstx, dsty, width, height,
				  data, pitch);

   /* RESTORE RASTER SETTINGS */

   gu2_rop32 = rop32;
}

/*
//----------------------------------------------------------------------------
// MONOCHROME BITMAP TO SCREEN BLT
//
// This routine transfers monochrome bitmap data to the screen.  
//
//      SRCX            X offset within source bitmap
//      SRCY            Y offset within source bitmap
//      DSTX            screen X position to render data
//      DSTY            screen Y position to render data
//      WIDTH           width of rectangle, in pixels
//      HEIGHT          height of rectangle, in scanlines
//      *DATA           pointer to bitmap data
//      PITCH           pitch of bitmap data (bytes between scanlines)
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			      unsigned short dstx, unsigned short dsty,
			      unsigned short width, unsigned short height,
			      unsigned char *data, short pitch)
#else
void
gfx_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			      unsigned short dstx, unsigned short dsty,
			      unsigned short width, unsigned short height,
			      unsigned char *data, short pitch)
#endif
{
   unsigned long dstoffset, size, bytes;
   unsigned long offset, temp_offset, temp1 = 0, temp2 = 0;
   unsigned long i, j = 0, fifo_lines, dwords_extra, bytes_extra;
   unsigned long shift = 0;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE STARTING OFFSETS */

   offset = (unsigned long)srcy *pitch + ((unsigned long)srcx >> 3);

   dstoffset = (unsigned long)dsty *gu2_pitch +
	 (((unsigned long)dstx) << gu2_xshift);

   /* CHECK IF PATTERN ORIGINS NEED TO BE SET */

   if (GFXpatternFlags) {
      /* COMBINE X AND Y PATTERN ORIGINS WITH OFFSET */

      dstoffset |= ((unsigned long)(dstx & 7)) << 26;
      dstoffset |= ((unsigned long)(dsty & 7)) << 29;
   }

   bytes = ((srcx & 7) + width + 7) >> 3;
   fifo_lines = bytes >> 5;
   dwords_extra = (bytes & 0x0000001Cl) >> 2;
   bytes_extra = bytes & 0x00000003l;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first).   */
   /* The source offset is always 0 since we allow misaligned dword reads. */
   /* Need to wait for busy instead of pending, since hardware clears      */
   /* the host data FIFO at the beginning of a BLT.                        */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_SRC_OFFSET, ((unsigned long)srcx & 7) << 26);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_pitch);
   WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | MGP_BM_SRC_HOST | MGP_BM_SRC_MONO);

   /* WAIT FOR BLT TO BE LATCHED */

   GU2_WAIT_PENDING;

   /* WRITE ALL OF THE DATA TO THE HOST SOURCE REGISTER */

   while (height--) {
      temp_offset = offset;

      /* WRITE ALL FULL FIFO LINES */

      for (i = 0; i < fifo_lines; i++) {
	 GU2_WAIT_HALF_EMPTY;
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, 8, j, data, temp_offset, temp1);
	 temp_offset += 32;
      }

      /* WRITE ALL FULL DWORDS */

      GU2_WAIT_HALF_EMPTY;
      if (dwords_extra) {
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, dwords_extra, i, data,
			      temp_offset, temp1);
	 temp_offset += (dwords_extra << 2);
      }

      /* WRITE REMAINING BYTES */

      shift = 0;
      if (bytes_extra)
	 WRITE_GPREG_STRING8(MGP_HST_SOURCE, bytes_extra, shift, i, data,
			     temp_offset, temp1, temp2);

      offset += pitch;
   }
}

/*---------------------------------------------------------------------------
 * GFX_TEXT_BLT	
 *
 * This routine is similar to the gfx_mono_bitmap_to_screen_blt routine 
 * but assumes that source data is byte-packed.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu2_text_blt(unsigned short dstx, unsigned short dsty, unsigned short width,
	     unsigned short height, unsigned char *data)
#else
void
gfx_text_blt(unsigned short dstx, unsigned short dsty, unsigned short width,
	     unsigned short height, unsigned char *data)
#endif
{
   unsigned long size, bytes;
   unsigned long dstoffset, temp1 = 0, temp2 = 0, temp_offset = 0;
   unsigned long i, j = 0, fifo_lines, dwords_extra, bytes_extra;
   unsigned long shift;

   size = (((unsigned long)width) << 16) | height;

   dstoffset = (unsigned long)dsty *gu2_pitch +
	 (((unsigned long)dstx) << gu2_xshift);

   /* CHECK IF PATTERN ORIGINS NEED TO BE SET */

   if (GFXpatternFlags) {
      /* COMBINE X AND Y PATTERN ORIGINS WITH OFFSET */

      dstoffset |= ((unsigned long)(dstx & 7)) << 26;
      dstoffset |= ((unsigned long)(dsty & 7)) << 29;
   }

   /* CALCULATE STARTING OFFSETS */

   bytes = ((width + 7) >> 3) * height;
   fifo_lines = bytes >> 5;
   dwords_extra = (bytes & 0x0000001Cl) >> 2;
   bytes_extra = bytes & 0x00000003l;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_SRC_OFFSET, 0);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_pitch);
   WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | MGP_BM_SRC_HOST |
	      MGP_BM_SRC_BP_MONO);

   /* WAIT FOR BLT TO BE LATCHED */

   GU2_WAIT_PENDING;

   /* WRITE ALL FULL FIFO LINES */

   for (i = 0; i < fifo_lines; i++) {
      GU2_WAIT_HALF_EMPTY;
      WRITE_GPREG_STRING32(MGP_HST_SOURCE, 8, j, data, temp_offset, temp1);
      temp_offset += 32;
   }

   /* WRITE ALL FULL DWORDS */

   if (dwords_extra || bytes_extra) {
      GU2_WAIT_HALF_EMPTY;
      if (dwords_extra) {
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, dwords_extra, i, data,
			      temp_offset, temp1);
	 temp_offset += (dwords_extra << 2);
      }
      if (bytes_extra) {
	 shift = 0;
	 WRITE_GPREG_STRING8(MGP_HST_SOURCE, bytes_extra, shift, i, data,
			     temp_offset, temp1, temp2);
      }
   }
}

/*
//----------------------------------------------------------------------------
// BRESENHAM LINE
//
// This routine draws a vector using the specified Bresenham parameters.  
// Currently this file does not support a routine that accepts the two 
// endpoints of a vector and calculates the Bresenham parameters.  If it 
// ever does, this routine is still required for vectors that have been 
// clipped.
//
//      X               screen X position to start vector
//      Y               screen Y position to start vector
//      LENGTH          length of the vector, in pixels
//      INITERR         Bresenham initial error term
//      AXIALERR        Bresenham axial error term
//      DIAGERR         Bresenham diagonal error term
//      FLAGS           VM_YMAJOR, VM_MAJOR_INC, VM_MINOR_INC
//----------------------------------------------------------------------------
*/
#if GFX_2DACCEL_DYNAMIC
void
gu2_bresenham_line(unsigned short x, unsigned short y,
		   unsigned short length, unsigned short initerr,
		   unsigned short axialerr, unsigned short diagerr,
		   unsigned short flags)
#else
void
gfx_bresenham_line(unsigned short x, unsigned short y,
		   unsigned short length, unsigned short initerr,
		   unsigned short axialerr, unsigned short diagerr,
		   unsigned short flags)
#endif
{
   unsigned long offset;
   unsigned long data1 = (((unsigned long)axialerr) << 16) | diagerr;
   unsigned long data2 = (((unsigned long)length) << 16) | initerr;
   unsigned short vector_mode = gu2_vector_mode | flags;

   /* CALCULATE STARTING OFFSET */

   offset = (unsigned long)y *gu2_pitch + (((unsigned long)x) << gu2_xshift);

   /* CHECK NULL LENGTH */

   if (!length)
      return;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_DST_OFFSET, offset);
   WRITE_GP32(MGP_VEC_ERR, data1);
   WRITE_GP32(MGP_VEC_LEN, data2);
   WRITE_GP32(MGP_STRIDE, gu2_pitch);
   WRITE_GP32(MGP_VECTOR_MODE, vector_mode);
}

/*---------------------------------------------------------------------------
 * GFX_WAIT_UNTIL_IDLE
 *
 * This routine waits until the graphics engine is idle.  This is required
 * before allowing direct access to the frame buffer.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu2_wait_until_idle(void)
#else
void
gfx_wait_until_idle(void)
#endif
{
   while (READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_BUSY) ;
}

/*---------------------------------------------------------------------------
 * GFX_TEST_BLT_PENDING
 *
 * This routine returns 1 if a BLT is pending, meaning that a call to 
 * perform a rendering operation would stall.  Otherwise it returns 0.
 * It is used by Darwin during random testing to only start a BLT 
 * operation when it knows the Durango routines won't spin on graphics
 * (so Darwin can continue to do frame buffer reads and writes).
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
int
gu2_test_blt_pending(void)
#else
int
gfx_test_blt_pending(void)
#endif
{
   if (READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_PENDING)
      return (1);
   return (0);
}

/*---------------------------------------------------------------------------
 * NEW ROUTINES FOR REDCLOUD
 *---------------------------------------------------------------------------
 */

/*---------------------------------------------------------------------------
 * GFX2_SET_SOURCE_STRIDE
 *
 * This routine sets the stride to be used in successive screen to screen 
 * BLTs (used by gfx2_screen_to_screen_blt and gfx2_mono_expand_blt).
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_source_stride(unsigned short stride)
#else
void
gfx2_set_source_stride(unsigned short stride)
#endif
{
   /* SAVE STRIDE TO BE USED LATER */

   gu2_src_pitch = (unsigned long)stride;
}

/*---------------------------------------------------------------------------
 * GFX2_SET_DESTINATION_STRIDE
 *
 * This routine sets the stride used when rendering to the screen.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_destination_stride(unsigned short stride)
#else
void
gfx2_set_destination_stride(unsigned short stride)
#endif
{
   /* SAVE STRIDE TO BE USED LATER */

   gu2_dst_pitch = (unsigned long)stride;
}

/*---------------------------------------------------------------------------
 * GFX2_SET_PATTERN_ORIGIN
 *
 * This routine sets the origin within an 8x8 pattern.  It is needed if 
 * using a monochrome or color pattern (not used for a solid pattern).
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_pattern_origin(int x, int y)
#else
void
gfx2_set_pattern_origin(int x, int y)
#endif
{
   /* STORE IN FORMAT THAT CAN BE COMBINED WITH THE DESTINATION OFFSET */

   gu2_pattern_origin = (((unsigned long)(x & 7)) << 26) |
	 (((unsigned long)(y & 7)) << 29);
}

/*---------------------------------------------------------------------------
 * GFX2_SET_SOURCE_TRANSPARENCY
 *
 * This routine sets the source transparency color and mask to be used
 * in future rendering operations.  If both the color and mask are set 
 * to zero (normally completely transparent), those values indicate that
 * transparency should be disabled.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_source_transparency(unsigned long color, unsigned long mask)
#else
void
gfx2_set_source_transparency(unsigned long color, unsigned long mask)
#endif
{
   /* WRITE REGISTERS TO SPECIFY COLOR TRANSPARENCY */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_SRC_COLOR_FG, color);
   WRITE_GP32(MGP_SRC_COLOR_BG, mask);

   /* SET TRANSPARENCY FLAG */

   GFXsourceFlags = (color || mask) ? MGP_RM_SRC_TRANS : 0;
}

/*---------------------------------------------------------------------------
 * GFX2_SET_ALPHA_MODE
 *
 * This routine sets the alpha blending mode to be used in successive 
 * rendering operations.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_alpha_mode(int mode)
#else
void
gfx2_set_alpha_mode(int mode)
#endif
{
   /* SAVE ALPHA MODE FOR LATER */

   gu2_alpha_mode = mode;
}

/*---------------------------------------------------------------------------
 * GFX2_SET_ALPHA_VALUE
 *
 * This routine sets the alpha value to be used with certain alpha blending
 * modes (ALPHA_MODE_BLEND).
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_set_alpha_value(unsigned char value)
#else
void
gfx2_set_alpha_value(unsigned char value)
#endif
{
   /* SAVE ALPHA VALUE TO BE USED LATER */

   gu2_alpha_value = (unsigned long)value;

   /* SET GLOBAL FLAG */
   /* gfx2_* routines will use this flag to program alpha values */
   /* appropriately.  Normal gfx_* routines will always write    */
   /* the current ROP settings.  In this way, the alpha mode     */
   /* affects only second generation routines.                   */

   gu2_alpha_active = 1;

   switch (gu2_alpha_mode) {
   case ALPHA_MODE_BLEND:

      /* GENERATE 32-BIT VERSION OF RASTER MODE REGISTER */
      /* Pattern data is not involved in the operation.  */

      gu2_alpha32 = gu2_alpha_value | gu2_bpp;

      /* HANDLE SPECIAL CASES FOR ENDPOINTS              */
      /* The 8-bit hardware alpha value is always        */
      /* interpreted as a fraction.  Consequently, there */
      /* is no way to use values of 255 or 0 to exclude  */
      /* one of the inputs.                              */

      switch (gu2_alpha_value) {
	 /* DESTINATION ONLY                               */
	 /* Operation is alpha * A, where A is destination */
	 /* and alpha is 1.                                */

      case 0:

	 gu2_alpha32 |= MGP_RM_SELECT_ALPHA_1 |
	       MGP_RM_ALPHA_TIMES_A |
	       MGP_RM_ALPHA_TO_RGB | MGP_RM_DEST_FROM_CHAN_A;
	 break;

	 /* SOURCE ONLY                                    */
	 /* Operation is alpha * A, where A is source and  */
	 /* alpha is 1.                                    */

      case 255:

	 gu2_alpha32 |= MGP_RM_SELECT_ALPHA_1 |
	       MGP_RM_ALPHA_TO_RGB | MGP_RM_ALPHA_TIMES_A;
	 break;

	 /* DEFAULT                                        */
	 /* Operation is alpha * A + (1 - alpha) * B;      */
	 /* A is source, B is destination and alpha is the */
	 /* programmed 8-bit value.                        */

      default:

	 gu2_alpha32 |= MGP_RM_SELECT_ALPHA_R |
	       MGP_RM_ALPHA_TO_RGB | MGP_RM_ALPHA_A_PLUS_BETA_B;

      }

      /* CHECK IF SOURCE INFORMATION SHOULD BE MERGED    */
      /* Alpha value of 0 indicates destination only.    */

      if (gu2_alpha_value != 0)
	 gu2_alpha32 |= GFXsourceFlags;

      /* SET FLAG FOR DESTINATION DATA IF NECESSARY      */
      /* Alpha value of 255 indicates no destination     */

      if (gu2_alpha_value != 255) {
	 gu2_alpha_blt_mode = MGP_BM_DST_REQ;
	 gu2_alpha_vec_mode = MGP_VM_DST_REQ;
      }

      break;
   }
}

/*---------------------------------------------------------------------------
 * GFX2_PATTERN_FILL
 *
 * This routine is similar to the gfx_pattern_fill routine, but allows the
 * use of an arbitrary destination stride.  The rendering position is 
 * also specified as an offset instead of an (x,y) position.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_pattern_fill(unsigned long dstoffset, unsigned short width,
		  unsigned short height)
#else
void
gfx2_pattern_fill(unsigned long dstoffset, unsigned short width,
		  unsigned short height)
#endif
{
   unsigned long size;

   size = (((unsigned long)width) << 16) | height;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch);
   WRITE_GP32(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;
}

/*---------------------------------------------------------------------------
 * GFX2_COLOR_PATTERN_FILL
 *
 * This routine is used to render a rectangle using the current raster 
 * operation and the specified color pattern.  It allows an 8x8 color 
 * pattern to be rendered without multiple calls to the gfx_set_color_pattern
 * and gfx_pattern_fill routines.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_color_pattern_fill(unsigned long dstoffset, unsigned short width,
			unsigned short height, unsigned long *pattern)
#else
void
gfx2_color_pattern_fill(unsigned long dstoffset, unsigned short width,
			unsigned short height, unsigned long *pattern)
#endif
{
   int pass;
   unsigned long lines, size, patxorigin, patoffset;

   /* ONLY USE HW PATTERN ORIGIN FOR THE X DIRECTION */
   /* Y direction handled by referencing proper location in pattern data. */

   patxorigin = (gu2_pattern_origin) & 0x1C000000;

   /* OVERRIDE PATTERN FLAGS IN ROP TO FORCE COLOR PATTERN */

   GU2_WAIT_PENDING;
   WRITE_GP32(MGP_RASTER_MODE,
	      (gu2_rop32 & ~MGP_RM_PAT_FLAGS) | MGP_RM_PAT_COLOR);

   /* ATTEMPT TO OPTIMIZE */
   /* If possible, we can perform the pattern fill in only a few passes    */
   /* This is performed by multiplying the pitch by an appropriate amount. */
   /* Consequently, if the multiplied pitch exceeds 16 bits, this          */
   /* optimization is impossible.                                          */

   if ((gu2_dst_pitch << (gu2_xshift + 1)) <= 0xFFFF) {
      /* HANDLE VARIOUS COLOR DEPTHS DIFFERENTLY */

      switch (gu2_xshift) {
      case 0:				/* 8 BPP */

	 /* TWO PASSES FOR 8 BPP */
	 /* Render every other line per pass by doubling the pitch. */

	 patoffset = (gu2_pattern_origin >> 28) & 0x0E;
	 for (pass = 0; pass < 2; pass++) {
	    /* CAN WRITE SOME PATTERN REGISTERS WHILE "PENDING" */

	    GU2_WAIT_PENDING;
	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    lines = (height + 1 - pass) >> 1;
	    if (!lines)
	       break;
	    size = (((unsigned long)width) << 16) | lines;
	    WRITE_GP32(MGP_WID_HEIGHT, size);
	    WRITE_GP32(MGP_STRIDE, gu2_dst_pitch << 1);
	    WRITE_GP32(MGP_PAT_DATA_1, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_DATA_0, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 4) & 0x0E;
	    WRITE_GP32(MGP_PAT_COLOR_1, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_0, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 4) & 0x0E;

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined. */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_3, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_2, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 4) & 0x0E;
	    WRITE_GP32(MGP_PAT_COLOR_5, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_4, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);
	    gu2_bm_throttle = 0;
	    gu2_vm_throttle = 0;

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch;
	    patoffset = (patoffset + 6) & 0x0E;
	 }
	 break;

      case 1:				/* 12, 15, OR 16 BPP */

	 /* FOUR PASSES FOR 16 BPP */
	 /* Render every 4th line per pass by quadrupling the pitch. */

	 patoffset = (gu2_pattern_origin >> 27) & 0x1C;
	 for (pass = 0; pass < 4; pass++) {
	    /* CAN WRITE SOME PATTERN REGISTERS WHILE "PENDING" */

	    GU2_WAIT_PENDING;
	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    lines = (height + 3 - pass) >> 2;
	    if (!lines)
	       break;
	    size = (((unsigned long)width) << 16) | lines;
	    WRITE_GP32(MGP_WID_HEIGHT, size);
	    WRITE_GP32(MGP_STRIDE, gu2_dst_pitch << 2);
	    WRITE_GP32(MGP_PAT_COLOR_1, WORD_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_0, WORD_SWIZZLE(pattern[patoffset + 1]));
	    WRITE_GP32(MGP_PAT_DATA_1, WORD_SWIZZLE(pattern[patoffset + 2]));
	    WRITE_GP32(MGP_PAT_DATA_0, WORD_SWIZZLE(pattern[patoffset + 3]));
	    patoffset = (patoffset + 16) & 0x1C;

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined. */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_5, WORD_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_4, WORD_SWIZZLE(pattern[patoffset + 1]));
	    WRITE_GP32(MGP_PAT_COLOR_3, WORD_SWIZZLE(pattern[patoffset + 2]));
	    WRITE_GP32(MGP_PAT_COLOR_2, WORD_SWIZZLE(pattern[patoffset + 3]));
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);
	    gu2_bm_throttle = 0;
	    gu2_vm_throttle = 0;

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch;
	    patoffset = (patoffset + 20) & 0x1C;
	 }
	 break;

      case 2:				/* 32 BPP */

	 /* EIGHT PASSES FOR 32 BPP */
	 /* Render every 8th line per pass by setting pitch * 8. */

	 patoffset = (gu2_pattern_origin >> 26) & 0x38;
	 for (pass = 0; pass < 8; pass++) {
	    /* CAN WRITE SOME PATTERN REGISTERS WHILE "PENDING" */

	    GU2_WAIT_PENDING;
	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    lines = (height + 7 - pass) >> 3;
	    if (!lines)
	       break;
	    size = (((unsigned long)width) << 16) | lines;
	    WRITE_GP32(MGP_WID_HEIGHT, size);
	    WRITE_GP32(MGP_STRIDE, gu2_dst_pitch << 3);
	    WRITE_GP32(MGP_PAT_COLOR_1, pattern[patoffset + 4]);
	    WRITE_GP32(MGP_PAT_COLOR_0, pattern[patoffset + 5]);
	    WRITE_GP32(MGP_PAT_DATA_1, pattern[patoffset + 6]);
	    WRITE_GP32(MGP_PAT_DATA_0, pattern[patoffset + 7]);

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined. */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_5, pattern[patoffset]);
	    WRITE_GP32(MGP_PAT_COLOR_4, pattern[patoffset + 1]);
	    WRITE_GP32(MGP_PAT_COLOR_3, pattern[patoffset + 2]);
	    WRITE_GP32(MGP_PAT_COLOR_2, pattern[patoffset + 3]);
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);
	    gu2_bm_throttle = 0;
	    gu2_vm_throttle = 0;

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch;
	    patoffset = (patoffset + 8) & 0x38;
	 }
	 break;
      }
   }

   else {
      WRITE_GP32(MGP_STRIDE, gu2_dst_pitch);

      switch (gu2_xshift) {
      case 0:				/* 8 BPP - 4 LINES PER PASS */

	 patoffset = (gu2_pattern_origin >> 28) & 0x0E;
	 while (height) {
	    lines = height > 4 ? 4 : height;

	    /* CAN WRITE SOME REGISTERS WHILE PENDING */

	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    WRITE_GP32(MGP_WID_HEIGHT,
		       (((unsigned long)width) << 16) | lines);
	    WRITE_GP32(MGP_PAT_DATA_1, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_DATA_0, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 2) & 0x0E;
	    WRITE_GP32(MGP_PAT_COLOR_1, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_0, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 2) & 0x0E;

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined.             */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_3, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_2, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 2) & 0x0E;
	    WRITE_GP32(MGP_PAT_COLOR_5, BYTE_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_4, BYTE_SWIZZLE(pattern[patoffset + 1]));
	    patoffset = (patoffset + 2) & 0x0E;
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch << 2;
	    height -= (unsigned short)lines;
	 }
	 break;

      case 1:				/* 12, 15 AND 16 BPP - 2 LINES PER PASS */

	 patoffset = (gu2_pattern_origin >> 27) & 0x1C;
	 while (height) {
	    lines = height > 2 ? 2 : height;

	    /* CAN WRITE SOME REGISTERS WHILE PENDING */

	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    WRITE_GP32(MGP_WID_HEIGHT,
		       (((unsigned long)width) << 16) | lines);
	    WRITE_GP32(MGP_PAT_COLOR_1, WORD_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_0, WORD_SWIZZLE(pattern[patoffset + 1]));
	    WRITE_GP32(MGP_PAT_DATA_1, WORD_SWIZZLE(pattern[patoffset + 2]));
	    WRITE_GP32(MGP_PAT_DATA_0, WORD_SWIZZLE(pattern[patoffset + 3]));
	    patoffset = (patoffset + 4) & 0x1C;

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined.             */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_5, WORD_SWIZZLE(pattern[patoffset]));
	    WRITE_GP32(MGP_PAT_COLOR_4, WORD_SWIZZLE(pattern[patoffset + 1]));
	    WRITE_GP32(MGP_PAT_COLOR_3, WORD_SWIZZLE(pattern[patoffset + 2]));
	    WRITE_GP32(MGP_PAT_COLOR_2, WORD_SWIZZLE(pattern[patoffset + 3]));
	    patoffset = (patoffset + 4) & 0x1C;
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch << 1;
	    height -= (unsigned short)lines;
	 }
	 break;

      case 2:				/* 32 BPP - 1 LINE PER PASS */

	 patoffset = (gu2_pattern_origin >> 26) & 0x38;
	 while (height) {
	    /* CAN WRITE SOME REGISTERS WHILE PENDING */

	    WRITE_GP32(MGP_DST_OFFSET, dstoffset | patxorigin);
	    WRITE_GP32(MGP_WID_HEIGHT, (((unsigned long)width) << 16) | 1l);
	    WRITE_GP32(MGP_PAT_COLOR_1, pattern[patoffset + 4]);
	    WRITE_GP32(MGP_PAT_COLOR_0, pattern[patoffset + 5]);
	    WRITE_GP32(MGP_PAT_DATA_1, pattern[patoffset + 6]);
	    WRITE_GP32(MGP_PAT_DATA_0, pattern[patoffset + 7]);

	    /* NEED TO WAIT UNTIL IDLE FOR COLORS 2 THROUGH 5 */
	    /* Those registers are not pipelined.             */

	    GU2_WAIT_BUSY;
	    WRITE_GP32(MGP_PAT_COLOR_5, pattern[patoffset]);
	    WRITE_GP32(MGP_PAT_COLOR_4, pattern[patoffset + 1]);
	    WRITE_GP32(MGP_PAT_COLOR_3, pattern[patoffset + 2]);
	    WRITE_GP32(MGP_PAT_COLOR_2, pattern[patoffset + 3]);
	    patoffset = (patoffset + 8) & 0x38;
	    WRITE_GP16(MGP_BLT_MODE, gu2_blt_mode | gu2_bm_throttle);

	    /* ADJUST FOR NEXT PASS */

	    dstoffset += gu2_dst_pitch;
	    height--;
	 }
	 break;
      }

   }

}

/*---------------------------------------------------------------------------
 * GFX2_SCREEN_TO_SCREEN_BLT
 *
 * This routine is similar to the gfx_screen_to_screen_blt routine but
 * allows the use of arbitrary source and destination strides and alpha
 * blending.  It also allows the use of an arbitrary ROP with transparency.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_screen_to_screen_blt(unsigned long srcoffset, unsigned long dstoffset,
			  unsigned short width, unsigned short height,
			  int flags)
#else
void
gfx2_screen_to_screen_blt(unsigned long srcoffset, unsigned long dstoffset,
			  unsigned short width, unsigned short height,
			  int flags)
#endif
{
   unsigned long size, xbytes;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | height;

   /* USE ALPHA SETTINGS, IF REQUESTED */

   if (gu2_alpha_active)
      blt_mode = gu2_alpha_blt_mode | MGP_BM_SRC_FB;

   else
      blt_mode = gu2_blt_mode | MGP_BM_SRC_FB;

   /* CALCULATE THE DIRECTION OF THE BLT */
   /* Using offsets, so flags from the calling routine are needed. */

   if (flags & 1) {
      xbytes = (width - 1) << gu2_xshift;
      srcoffset += xbytes;
      dstoffset += xbytes;
      blt_mode |= MGP_BM_NEG_XDIR;
   }
   if (flags & 2) {
      srcoffset += (height - 1) * gu2_src_pitch;
      dstoffset += (height - 1) * gu2_dst_pitch;
      blt_mode |= MGP_BM_NEG_YDIR;
   }

   /* TURN INTO BYTE ADDRESS IF NEGATIVE X DIRECTION */
   /* This is a quirk of the hardware. */

   if (blt_mode & MGP_BM_NEG_XDIR) {
      srcoffset += (1 << gu2_xshift) - 1;
      dstoffset += (1 << gu2_xshift) - 1;
   }

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;

   if (gu2_alpha_active) {
      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else {
      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   }

   WRITE_GP32(MGP_SRC_OFFSET, srcoffset);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch | (gu2_src_pitch << 16));
   WRITE_GP16(MGP_BLT_MODE, blt_mode | gu2_bm_throttle);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;
}

/*---------------------------------------------------------------------------
 * GFX2_MONO_EXPAND_BLT
 *
 * This routine is similar to the gfx2_screen_to_screen_blt routine but
 * expands monochrome data stored in graphics memory.  
 * WARNING: This routine assumes that the regions in graphics memory 
 * will not overlap, and therefore does not check the BLT direction.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_mono_expand_blt(unsigned long srcbase, unsigned short srcx,
		     unsigned short srcy, unsigned long dstoffset,
		     unsigned short width, unsigned short height,
		     int byte_packed)
#else
void
gfx2_mono_expand_blt(unsigned long srcbase, unsigned short srcx,
		     unsigned short srcy, unsigned long dstoffset,
		     unsigned short width, unsigned short height,
		     int byte_packed)
#endif
{
   unsigned long size, srcoffset;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE SOURCE OFFSET */

   srcoffset = srcbase + (unsigned long)srcy *gu2_src_pitch;

   srcoffset += srcx >> 3;
   srcoffset |= ((unsigned long)srcx & 7) << 26;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;

   if (gu2_alpha_active) {
      blt_mode = gu2_alpha_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else {
      blt_mode = gu2_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   }

   if (byte_packed)
      blt_mode |= MGP_BM_SRC_FB | MGP_BM_SRC_BP_MONO | gu2_bm_throttle;
   else
      blt_mode |= MGP_BM_SRC_FB | MGP_BM_SRC_MONO | gu2_bm_throttle;

   WRITE_GP32(MGP_SRC_OFFSET, srcoffset);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch | (gu2_src_pitch << 16));
   WRITE_GP16(MGP_BLT_MODE, blt_mode);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;
}

/*---------------------------------------------------------------------------
 * GFX2_COLOR_BITMAP_TO_SCREEN_BLT	
 *
 * This routine is similar to the gfx_color_bitmap_to_screen_blt routine 
 * but allows the use of an arbitrary destination stride and alpha blending.
 * It also allows the use of an arbitrary ROP with transparency.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				unsigned long dstoffset, unsigned short width,
				unsigned short height, unsigned char *data,
				short pitch)
#else
void
gfx2_color_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
				unsigned long dstoffset, unsigned short width,
				unsigned short height, unsigned char *data,
				short pitch)
#endif
{
   unsigned long size, bytes;
   unsigned long offset, temp_offset;
   unsigned long srcoffset, dword_bytes, bytes_extra;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | 1;

   /* CALCULATE STARTING OFFSETS */

   offset = (unsigned long)srcy *pitch + ((unsigned long)srcx << gu2_xshift);

   dstoffset |= gu2_pattern_origin;

   bytes = width << gu2_xshift;
   dword_bytes = bytes & ~0x3L;
   bytes_extra = bytes & 0x3L;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first).   */
   /* The source offset is always 0 since we allow misaligned dword reads. */
   /* We must wait for BLT busy because the GP may be executing a screen   */
   /* to screen BLT from the scratchpad area.                              */

   GU2_WAIT_BUSY;

   if (gu2_alpha_active) {
      blt_mode = gu2_alpha_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else {
      blt_mode = gu2_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   }
   blt_mode |= MGP_BM_SRC_FB | gu2_bm_throttle;
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;

   WRITE_GP32(MGP_WID_HEIGHT, size);

   /* WRITE DATA ONE LINE AT A TIME */
   /* For speed reasons, data is written to an offscreen scratch area and then        */
   /* BLTed using a screen to screen BLT. This is similar to the GX1 BLT buffers, but */
   /* slightly more efficient in that we can queue up data while the GP is rendering  */
   /* a line.                                                                         */

   while (height--) {
      temp_offset = offset;
      srcoffset = gfx_gx2_scratch_base;
      if (gu2_current_line)
	 srcoffset += 8192;

      GU2_WAIT_PENDING;
      WRITE_GP32(MGP_SRC_OFFSET, srcoffset);
      WRITE_GP32(MGP_DST_OFFSET, dstoffset);
      dstoffset += gu2_dst_pitch;
      dstoffset += 0x20000000;

      WRITE_FRAME_BUFFER_STRING32(srcoffset, dword_bytes, data, temp_offset);
      if (bytes_extra) {
	 temp_offset += dword_bytes;
	 srcoffset += dword_bytes;
	 WRITE_FRAME_BUFFER_STRING8(srcoffset, bytes_extra, data,
				    temp_offset);
      }
      WRITE_GP16(MGP_BLT_MODE, blt_mode);
      offset += pitch;
      gu2_current_line = 1 - gu2_current_line;
   }
}

/*---------------------------------------------------------------------------
 * GFX2_TEXT_BLT	
 *
 * This routine is similar to the gfx2_mono_bitmap_to_screen_blt routine 
 * but assumes that source data is byte-packed.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_text_blt(unsigned long dstoffset, unsigned short width,
	      unsigned short height, unsigned char *data)
#else
void
gfx2_text_blt(unsigned long dstoffset, unsigned short width,
	      unsigned short height, unsigned char *data)
#endif
{
   unsigned long size, bytes;
   unsigned long temp1 = 0, temp2 = 0, temp_offset = 0;
   unsigned long i, j = 0, fifo_lines, dwords_extra, bytes_extra;
   unsigned long shift;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE STARTING OFFSETS */

   bytes = ((width + 7) >> 3) * height;
   fifo_lines = bytes >> 5;
   dwords_extra = (bytes & 0x0000001Cl) >> 2;
   bytes_extra = bytes & 0x00000003l;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */

   GU2_WAIT_PENDING;

   if (gu2_alpha_active) {
      blt_mode = gu2_alpha_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else {
      blt_mode = gu2_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   }

   WRITE_GP32(MGP_SRC_OFFSET, 0);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch);
   WRITE_GP16(MGP_BLT_MODE, blt_mode | MGP_BM_SRC_HOST |
	      MGP_BM_SRC_BP_MONO | gu2_bm_throttle);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;

   /* WAIT FOR BLT TO BE LATCHED */

   GU2_WAIT_PENDING;

   /* WRITE ALL FULL FIFO LINES */

   for (i = 0; i < fifo_lines; i++) {
      GU2_WAIT_HALF_EMPTY;
      WRITE_GPREG_STRING32(MGP_HST_SOURCE, 8, j, data, temp_offset, temp1);
      temp_offset += 32;
   }

   /* WRITE ALL FULL DWORDS */

   if (dwords_extra || bytes_extra) {
      GU2_WAIT_HALF_EMPTY;
      if (dwords_extra) {
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, dwords_extra, i, data,
			      temp_offset, temp1);
	 temp_offset += (dwords_extra << 2);
      }
      if (bytes_extra) {
	 shift = 0;
	 WRITE_GPREG_STRING8(MGP_HST_SOURCE, bytes_extra, shift, i, data,
			     temp_offset, temp1, temp2);
      }
   }
}

/*---------------------------------------------------------------------------
 * GFX2_MONO_BITMAP_TO_SCREEN_BLT	
 *
 * This routine is similar to the gfx_mono_bitmap_to_screen_blt routine 
 * but allows the use of an arbitrary destination stride and alpha blending.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			       unsigned long dstoffset, unsigned short width,
			       unsigned short height, unsigned char *data,
			       short pitch)
#else
void
gfx2_mono_bitmap_to_screen_blt(unsigned short srcx, unsigned short srcy,
			       unsigned long dstoffset, unsigned short width,
			       unsigned short height, unsigned char *data,
			       short pitch)
#endif
{
   unsigned long size, bytes;
   unsigned long offset, temp_offset, temp1 = 0, temp2 = 0;
   unsigned long i, j = 0, fifo_lines, dwords_extra, bytes_extra;
   unsigned long shift = 0;
   unsigned short blt_mode;

   size = (((unsigned long)width) << 16) | height;

   /* CALCULATE STARTING OFFSETS */

   offset = (unsigned long)srcy *pitch + ((unsigned long)srcx >> 3);

   bytes = ((srcx & 7) + width + 7) >> 3;
   fifo_lines = bytes >> 5;
   dwords_extra = (bytes & 0x0000001Cl) >> 2;
   bytes_extra = bytes & 0x00000003l;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */

   GU2_WAIT_PENDING;

   if (gu2_alpha_active) {
      blt_mode = gu2_alpha_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else {
      blt_mode = gu2_blt_mode;

      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);
   }

   WRITE_GP32(MGP_SRC_OFFSET, ((unsigned long)srcx & 7) << 26);
   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_WID_HEIGHT, size);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch);
   WRITE_GP16(MGP_BLT_MODE, blt_mode | MGP_BM_SRC_HOST |
	      MGP_BM_SRC_MONO | gu2_bm_throttle);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;

   /* WAIT FOR BLT TO BE LATCHED */

   GU2_WAIT_PENDING;

   /* WRITE ALL OF THE DATA TO THE HOST SOURCE REGISTER */

   while (height--) {
      temp_offset = offset;

      /* WRITE ALL FULL FIFO LINES */

      for (i = 0; i < fifo_lines; i++) {
	 GU2_WAIT_HALF_EMPTY;
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, 8, j, data, temp_offset, temp1);
	 temp_offset += 32;
      }

      /* WRITE ALL FULL DWORDS */

      GU2_WAIT_HALF_EMPTY;
      if (dwords_extra)
	 WRITE_GPREG_STRING32(MGP_HST_SOURCE, dwords_extra, i, data,
			      temp_offset, temp1);
      temp_offset += (dwords_extra << 2);

      /* WRITE REMAINING BYTES */

      shift = 0;
      if (bytes_extra)
	 WRITE_GPREG_STRING8(MGP_HST_SOURCE, bytes_extra, shift, i, data,
			     temp_offset, temp1, temp2);

      offset += pitch;
   }
}

/*---------------------------------------------------------------------------
 * GFX2_BRESENHAM_LINE
 *
 * This routine is similar to the gfx_bresenam_line routine but allows 
 * the use of an arbitrary destination stride.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_bresenham_line(unsigned long dstoffset,
		    unsigned short length, unsigned short initerr,
		    unsigned short axialerr, unsigned short diagerr,
		    unsigned short flags)
#else
void
gfx2_bresenham_line(unsigned long dstoffset,
		    unsigned short length, unsigned short initerr,
		    unsigned short axialerr, unsigned short diagerr,
		    unsigned short flags)
#endif
{
   unsigned long vector_mode = gu2_vector_mode | flags;
   unsigned long data1 = (((unsigned long)axialerr) << 16) | diagerr;
   unsigned long data2 = (((unsigned long)length) << 16) | initerr;

   /* CHECK NULL LENGTH */

   if (!length)
      return;

   /* POLL UNTIL ABLE TO WRITE TO THE REGISTERS */
   /* Put off poll for as long as possible (do most calculations first). */

   GU2_WAIT_PENDING;

   if (gu2_alpha_active) {
      vector_mode = gu2_alpha_vec_mode | flags;

      WRITE_GP32(MGP_RASTER_MODE, gu2_alpha32);
   } else
      WRITE_GP32(MGP_RASTER_MODE, gu2_rop32);

   WRITE_GP32(MGP_DST_OFFSET, dstoffset | gu2_pattern_origin);
   WRITE_GP32(MGP_VEC_ERR, data1);
   WRITE_GP32(MGP_VEC_LEN, data2);
   WRITE_GP32(MGP_STRIDE, gu2_dst_pitch);
   WRITE_GP32(MGP_VECTOR_MODE, vector_mode | gu2_vm_throttle);
   gu2_bm_throttle = 0;
   gu2_vm_throttle = 0;
}

/*---------------------------------------------------------------------------
 * GFX2_SYNC_TO_VBLANK
 *
 * This routine sets a flag to synchronize the next rendering routine to 
 * VBLANK.  The flag is cleared by the rendering routine.
 *---------------------------------------------------------------------------
 */
#if GFX_2DACCEL_DYNAMIC
void
gu22_sync_to_vblank(void)
#else
void
gfx2_sync_to_vblank(void)
#endif
{
   /* SET FLAGS TO THROTTLE NEXT RENDERING ROUTINE */

   gu2_bm_throttle = MGP_BM_THROTTLE;
   gu2_vm_throttle = MGP_VM_THROTTLE;
}

/* END OF FILE */