/*
 * NGLE - hardware acceleration.
 *
 * Copyright (C) 2024 Michael Lorenz
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/* $NetBSD: ngle_accel.c,v 1.11 2025/12/29 07:58:50 macallan Exp $ */

#include <sys/types.h>
#include <dev/ic/stireg.h>
#include <dev/ic/nglereg.h>

#include "ngle.h"

//#define DEBUG

#ifdef DEBUG
#define ENTER xf86Msg(X_ERROR, "%s\n", __func__)
#define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__)
#define DBGMSG xf86Msg
#else
#define ENTER
#define DBGMSG if (0) xf86Msg
#define LEAVE
#endif

static void
NGLEWaitMarker(ScreenPtr pScreen, int Marker)
{
	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	uint8_t stat;

	ENTER;
	do {
		stat = NGLERead1(fPtr, NGLE_BUSY);
		if (stat == 0)
			stat = NGLERead1(fPtr, NGLE_BUSY);
	} while (stat != 0);
	LEAVE;
}

static void
NGLEWaitFifo(NGLEPtr fPtr, int slots)
{
	uint32_t reg;

	ENTER;
	do {
		reg = NGLERead4(fPtr, NGLE_FIFO);
	} while (reg < slots);
	LEAVE;
}

static inline void
NGLEAccess(NGLEPtr fPtr, uint32_t ba)
{
	if (fPtr->hwmode != ba) {
		uint8_t stat;
		do {
			stat = NGLERead1(fPtr, NGLE_BUSY);
			if (stat == 0)
				stat = NGLERead1(fPtr, NGLE_BUSY);
		} while (stat != 0);
		NGLEWrite4(fPtr, NGLE_BAboth, ba);
		fPtr->hwmode = ba;
	}
}

static Bool
NGLEPrepareCopy_EG
(
    PixmapPtr pSrcPixmap,
    PixmapPtr pDstPixmap,
    int       xdir,
    int       ydir,
    int       alu,
    Pixel     planemask
)
{
	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	int srcpitch = exaGetPixmapPitch(pSrcPixmap);
	int srcoff = exaGetPixmapOffset(pSrcPixmap);

	ENTER;

	DBGMSG(X_ERROR, "%s %d %d\n", __func__, srcoff, srcpitch);
	fPtr->offset = srcoff >> 11;
	NGLEAccess(fPtr, BA(IndexedDcd, Otc04, Ots08, AddrLong, 0, BINapp0I, 0));
	NGLEWrite4(fPtr, NGLE_IBO,
	    IBOvals(RopSrc, 0, BitmapExtent08, 1, DataDynamic, MaskOtc, 0, 0));
	NGLEWrite4(fPtr, NGLE_PLANEMASK, planemask);

	LEAVE;
	return TRUE;
}

static Bool
NGLEPrepareCopy_HCRX
(
    PixmapPtr pSrcPixmap,
    PixmapPtr pDstPixmap,
    int       xdir,
    int       ydir,
    int       alu,
    Pixel     planemask
)
{
	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	int srcpitch = exaGetPixmapPitch(pSrcPixmap);
	int srcoff = exaGetPixmapOffset(pSrcPixmap);

	ENTER;

	DBGMSG(X_ERROR, "%s %d %d\n", __func__, srcoff, srcpitch);
	fPtr->offset = srcoff / srcpitch;
	NGLEAccess(fPtr, BA(FractDcd, Otc01, Ots08, AddrLong, 0, BINapp0F8, 0));
	NGLEWrite4(fPtr, NGLE_IBO,
	    IBOvals(RopSrc, 0, BitmapExtent32, 0, DataDynamic, MaskOtc, 0, 0));
	NGLEWrite4(fPtr, NGLE_PLANEMASK, planemask);

	LEAVE;
	return TRUE;
}

static void
NGLECopy
(
    PixmapPtr pDstPixmap,
    int       xs,
    int       ys,
    int       xd,
    int       yd,
    int       wi,
    int       he
)
{
	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	int dstpitch = exaGetPixmapPitch(pDstPixmap);
	int dstoff = exaGetPixmapOffset(pDstPixmap);

	ENTER;
	NGLEWaitFifo(fPtr, 3);
	NGLEWrite4(fPtr, NGLE_SRC_XY, (xs << 16) | (ys + fPtr->offset));
	NGLEWrite4(fPtr, NGLE_SIZE, (wi << 16) | he);
	NGLEWrite4(fPtr, NGLE_BLT_DST_START, (xd << 16) | (yd + (dstoff / dstpitch)));

	exaMarkSync(pDstPixmap->drawable.pScreen);
	LEAVE;
}

static void
NGLEDoneCopy(PixmapPtr pDstPixmap)
{
    ENTER;
    LEAVE;
}

static Bool
NGLEPrepareSolid_EG(
    PixmapPtr pPixmap,
    int alu,
    Pixel planemask,
    Pixel fg)
{
	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);

	ENTER;
	/* dst bitmap access */
	NGLEAccess(fPtr, BA(IndexedDcd, Otc32, OtsIndirect, AddrLong, 0, BINapp0I, 0));
	NGLEWaitFifo(fPtr, 4);
	/* plane mask */
	NGLEWrite4(fPtr, NGLE_PLANEMASK, planemask);
	/* bitmap op */
	NGLEWrite4(fPtr, NGLE_IBO, 
	    IBOvals(alu, 0, BitmapExtent08, 1, DataDynamic, MaskOtc, 1, 0));
    	NGLEWrite4(fPtr, NGLE_FG, fg);

	LEAVE;
	return TRUE;
}

static Bool
NGLEPrepareSolid_HCRX(
    PixmapPtr pPixmap,
    int alu,
    Pixel planemask,
    Pixel fg)
{
	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);

	ENTER;
	/* dst bitmap access */
	NGLEAccess(fPtr, BA(FractDcd, Otc32, OtsIndirect, AddrLong, 0, BINapp0F8, 0));
	NGLEWaitFifo(fPtr, 4);
	/* plane mask */
	NGLEWrite4(fPtr, NGLE_PLANEMASK, planemask);
	/* bitmap op */
	NGLEWrite4(fPtr, NGLE_IBO, 
	    IBOvals(alu, 0, BitmapExtent32, 1, DataDynamic, MaskOtc, 1, 0));
    	NGLEWrite4(fPtr, NGLE_FG, fg);

	LEAVE;
	return TRUE;
}

static void
NGLESolid(
    PixmapPtr pPixmap,
    int x1,
    int y1,
    int x2,
    int y2)
{
	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	int w = x2 - x1, h = y2 - y1;
	int pitch = exaGetPixmapPitch(pPixmap);
	int offset = exaGetPixmapOffset(pPixmap);
	uint32_t mask;
	int wi, rest;

	ENTER;
	
	y1 += offset / pitch;

	NGLEWaitFifo(fPtr, 3);
	/* transfer data */
	NGLEWrite4(fPtr, NGLE_TRANSFER_DATA, 0xffffffff);
	/* dst XY */
	NGLEWrite4(fPtr, NGLE_DST_XY, (x1 << 16) | y1);
	/* len XY start */
	NGLEWrite4(fPtr, NGLE_RECT_SIZE_START, (w << 16) | h);

	exaMarkSync(pPixmap->drawable.pScreen);
	LEAVE;
}

Bool
NGLEPrepareAccess_EG(PixmapPtr pPixmap, int index)
{
	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);

	NGLEAccess(fPtr, BA(IndexedDcd, Otc04, Ots08, AddrByte, 0, BINapp0I, 0));
	NGLEWaitFifo(fPtr, 2);	
	NGLEWrite4(fPtr, NGLE_IBO, 0x83000300);
	NGLEWrite4(fPtr, NGLE_PLANEMASK, 0xff);
	NGLEWaitMarker(pPixmap->drawable.pScreen, 0);
	NGLEWrite1(fPtr, NGLE_CONTROL_FB, 1);
	return TRUE;
}

Bool
NGLEPrepareAccess_HCRX(PixmapPtr pPixmap, int index)
{
	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);

	NGLEAccess(fPtr, BA(FractDcd, Otc01, Ots08, AddrLong, 0, BINapp0F8, 0));
	NGLEWaitFifo(fPtr, 2);	
	NGLEWrite4(fPtr, NGLE_IBO, 0x83000300);
	NGLEWrite4(fPtr, NGLE_PLANEMASK, 0xffffffff);
	NGLEWaitMarker(pPixmap->drawable.pScreen, 0);
	NGLEWrite1(fPtr, NGLE_CONTROL_FB, 1);
	return TRUE;
}

static Bool
NGLEUploadToScreen24(PixmapPtr pDst, int x, int y, int w, int h,
    char *src, int src_pitch)
{
	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	int	ofs =  exaGetPixmapOffset(pDst);
	int i;
	uint32_t *line, dst;

	ENTER;
	NGLEAccess(fPtr, BA(FractDcd, Otc01, Ots08, AddrLong, 0, BINapp0F8, 0));
	NGLEWaitFifo(fPtr, 2);	
	NGLEWrite4(fPtr, NGLE_IBO,
	    IBOvals(RopSrc, 0, BitmapExtent32, 0, DataDynamic, MaskOtc, 0, 0));
	NGLEWrite4(fPtr, NGLE_PLANEMASK, 0xffffffff);

	dst = (x << 2) + (y << 13);

	
	while (h--) {
		/*
		 * it *should* be impossible to overrun the FIFO using BINC
		 * writes, but overruns are annoying if they do happen so be
		 * overly cautious and make sure there is at least some room
		 */
		NGLEWaitFifo(fPtr, 15);
		NGLEWrite4(fPtr, NGLE_BINC_DST, dst);
		line = (uint32_t *)src;

		for (i = 0; i < w; i++)
			NGLEWrite4(fPtr, NGLE_BINC_DATA_R, line[i]);
		src += src_pitch;
		dst += 8192;
		y++;
	}

	LEAVE;

	return TRUE;
}

Bool
NGLEInitAccel(ScreenPtr pScreen)
{
	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
	NGLEPtr fPtr = NGLEPTR(pScrn);
	ExaDriverPtr pExa;
	int lines, bpp = pScrn->bitsPerPixel >> 3;

	pExa = exaDriverAlloc();
	if (!pExa)
		return FALSE;

	fPtr->pExa = pExa;

	pExa->exa_major = EXA_VERSION_MAJOR;
	pExa->exa_minor = EXA_VERSION_MINOR;

	pExa->memoryBase = fPtr->fbmem;
	lines = fPtr->fbmem_len / fPtr->fbi.fbi_stride;
	DBGMSG(X_ERROR, "lines %d\n", lines);	
	pExa->memorySize = fPtr->fbmem_len;
	pExa->offScreenBase = fPtr->fbi.fbi_stride * fPtr->fbi.fbi_height;
	pExa->pixmapOffsetAlign = fPtr->fbi.fbi_stride;
	pExa->pixmapPitchAlign = fPtr->fbi.fbi_stride;

	pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_MIXED_PIXMAPS;

	pExa->maxX = 2048;
	pExa->maxY = 2048;	

	fPtr->hwmode = -1;

	pExa->WaitMarker = NGLEWaitMarker;
	pExa->Solid = NGLESolid;
	pExa->DoneSolid = NGLEDoneCopy;
	pExa->Copy = NGLECopy;
	pExa->DoneCopy = NGLEDoneCopy;
	switch (fPtr->gid) {
		case STI_DD_EG:
			pExa->PrepareCopy = NGLEPrepareCopy_EG;
			pExa->PrepareSolid = NGLEPrepareSolid_EG;
			pExa->PrepareAccess = NGLEPrepareAccess_EG;
			break;
		case STI_DD_HCRX:
			pExa->PrepareCopy = NGLEPrepareCopy_HCRX;
			pExa->PrepareSolid = NGLEPrepareSolid_HCRX;
			pExa->PrepareAccess = NGLEPrepareAccess_HCRX;
			pExa->UploadToScreen = NGLEUploadToScreen24;
			break;
		default:
			xf86Msg(X_ERROR,
			    "unsupported device GID %08x\n", fPtr->gid);
			return FALSE;
	}
	NGLEWaitMarker(pScreen, 0);

	return exaDriverInit(pScreen, pExa);
}
