summary refs log tree commit diff stats
path: root/src/fe-gtk/mmx_cmod.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/fe-gtk/mmx_cmod.S')
-rw-r--r--src/fe-gtk/mmx_cmod.S530
1 files changed, 530 insertions, 0 deletions
diff --git a/src/fe-gtk/mmx_cmod.S b/src/fe-gtk/mmx_cmod.S
new file mode 100644
index 00000000..12e866de
--- /dev/null
+++ b/src/fe-gtk/mmx_cmod.S
@@ -0,0 +1,530 @@
+/*
+ * Copyright (C) 1997-2001, Michael Jennings
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies of the Software, its documentation and marketing & publicity
+ * materials, and acknowledgment shall be given in the documentation, materials
+ * and software packages that this Software was used.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* MMX routines for tinting XImages written by Willem Monsuwe <willem@stack.nl> */
+
+/* Function calling conventions:
+ *   shade_ximage_xx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+ */
+
+#define data	8(%ebp)
+#define bpl	12(%ebp)
+#define w	16(%ebp)
+#define h	20(%ebp)
+#define rm	24(%ebp)
+#define gm	28(%ebp)
+#define bm	32(%ebp)
+
+#ifdef UNDERSCORE_SYMBOLS /* need this to link with msvc */
+#define SHADE_XIMAGE_15 _shade_ximage_15_mmx
+#define SHADE_XIMAGE_16 _shade_ximage_16_mmx
+#define SHADE_XIMAGE_32 _shade_ximage_32_mmx
+#define HAVE_MMX _have_mmx
+#else
+#define SHADE_XIMAGE_15 shade_ximage_15_mmx
+#define SHADE_XIMAGE_16 shade_ximage_16_mmx
+#define SHADE_XIMAGE_32 shade_ximage_32_mmx
+#define HAVE_MMX have_mmx
+#endif
+
+.globl SHADE_XIMAGE_15
+.globl SHADE_XIMAGE_16
+.globl SHADE_XIMAGE_32
+.globl HAVE_MMX
+
+.bss
+.text
+.align 8
+
+#define ENTER                   \
+        pushl %ebp              ;\
+        movl %esp, %ebp         ;\
+        pushl %ebx              ;\
+        pushl %ecx              ;\
+        pushl %edx              ;\
+        pushl %edi              ;\
+        pushl %esi              ;\
+        movl data, %esi         ;\
+        movl w, %ebx            ;\
+        movl h, %edx
+
+#define LEAVE                   \
+4:                              ;\
+        emms                    ;\
+        popl %esi               ;\
+        popl %edi               ;\
+        popl %edx               ;\
+        popl %ecx               ;\
+        popl %ebx               ;\
+        movl %ebp, %esp         ;\
+        popl %ebp               ;\
+        ret
+
+
+SHADE_XIMAGE_15:
+        ENTER
+
+        leal -6(%esi, %ebx, 2), %esi
+        negl %ebx
+        jz 5f
+
+        /* Setup multipliers */
+        movd rm, %mm5
+        movd gm, %mm6
+        movd bm, %mm7
+        punpcklwd %mm5, %mm5    /* 00 00 00 00 rm rm rm rm */
+        punpcklwd %mm6, %mm6    /* 00 00 00 00 gm gm gm gm */
+        punpcklwd %mm7, %mm7    /* 00 00 00 00 bm bm bm bm */
+        punpckldq %mm5, %mm5    /* rm rm rm rm rm rm rm rm */
+        punpckldq %mm6, %mm6    /* gm gm gm gm gm gm gm gm */
+        punpckldq %mm7, %mm7    /* bm bm bm bm bm bm bm bm */
+
+        cmpl $256, rm
+        jg shade_ximage_15_mmx_saturate
+        cmpl $256, gm
+        jg shade_ximage_15_mmx_saturate
+        cmpl $256, bm
+        jg shade_ximage_15_mmx_saturate
+
+1:      movl %ebx, %ecx
+        addl $3, %ecx
+        jns 3f
+2:
+        movq (%esi, %ecx, 2), %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $10, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $11, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $3, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* 00 0r */
+        pmulhw %mm6, %mm1       /* 00 0g */
+        pmulhw %mm7, %mm2       /* 00 0b */
+
+        psllw $10, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+        
+        movq %mm0, (%esi, %ecx, 2)
+
+        addl $4, %ecx
+        js 2b
+        jmp 4f
+3:
+        movw (%esi, %ecx, 2), %ax
+        movd %eax, %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $10, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $11, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $3, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* 00 0r */
+        pmulhw %mm6, %mm1       /* 00 0g */
+        pmulhw %mm7, %mm2       /* 00 0b */
+
+        psllw $10, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movd %mm0, %eax
+        movw %ax, (%esi, %ecx, 2)
+
+        incl %ecx
+4:
+        cmpl $2, %ecx
+        jng 3b
+
+        addl bpl, %esi
+        decl %edx
+        jnz 1b
+5:
+        LEAVE
+
+
+shade_ximage_15_mmx_saturate:
+
+        pcmpeqw %mm3, %mm3
+        psllw $5, %mm3          /* ff e0 ff e0 ff e0 ff e0 */
+
+1:      movl %ebx, %ecx
+        addl $3, %ecx
+        jns 3f
+2:
+        movq (%esi, %ecx, 2), %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $10, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $11, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $3, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* xx xr */
+        pmulhw %mm6, %mm1       /* xx xg */
+        pmulhw %mm7, %mm2       /* xx xb */
+
+        /* Saturate upper */
+        paddusw %mm3, %mm0      /* ff er */
+        paddusw %mm3, %mm1      /* ff eg */
+        paddusw %mm3, %mm2      /* ff eb */
+
+        psubw %mm3, %mm0        /* 00 0r */
+        psubw %mm3, %mm1        /* 00 0g */
+        psubw %mm3, %mm2        /* 00 0b */
+        
+        psllw $10, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movq %mm0, (%esi, %ecx, 2)
+
+        addl $4, %ecx
+        js 2b
+        jmp 4f
+3:
+        movw (%esi, %ecx, 2), %ax
+        movd %eax, %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $10, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $11, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $3, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* xx xr */
+        pmulhw %mm6, %mm1       /* xx xg */
+        pmulhw %mm7, %mm2       /* xx xb */
+
+        /* Saturate upper */
+        paddusw %mm3, %mm0      /* ff er */
+        paddusw %mm3, %mm1      /* ff eg */
+        paddusw %mm3, %mm2      /* ff eb */
+
+        psubw %mm3, %mm0        /* 00 0r */
+        psubw %mm3, %mm1        /* 00 0g */
+        psubw %mm3, %mm2        /* 00 0b */
+        
+        psllw $10, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movd %mm0, %eax
+        movw %ax, (%esi, %ecx, 2)
+
+        incl %ecx
+4:
+        cmpl $2, %ecx
+        jng 3b
+
+        addl bpl, %esi
+        decl %edx
+        jnz 1b
+5:
+        LEAVE
+
+
+SHADE_XIMAGE_16:
+        ENTER
+
+        leal -6(%esi, %ebx, 2), %esi
+        negl %ebx
+        jz 5f
+
+        /* Setup multipliers */
+        movd rm, %mm5
+        movd gm, %mm6
+        movd bm, %mm7
+        punpcklwd %mm5, %mm5    /* 00 00 00 00 rm rm rm rm */
+        punpcklwd %mm6, %mm6    /* 00 00 00 00 gm gm gm gm */
+        punpcklwd %mm7, %mm7    /* 00 00 00 00 bm bm bm bm */
+        punpckldq %mm5, %mm5    /* rm rm rm rm rm rm rm rm */
+        punpckldq %mm6, %mm6    /* gm gm gm gm gm gm gm gm */
+        punpckldq %mm7, %mm7    /* bm bm bm bm bm bm bm bm */
+
+        cmpl $256, rm
+        jg shade_ximage_16_mmx_saturate
+        cmpl $256, gm
+        jg shade_ximage_16_mmx_saturate
+        cmpl $256, bm
+        jg shade_ximage_16_mmx_saturate
+
+1:      movl %ebx, %ecx
+        addl $3, %ecx
+        jns 3f
+2:
+        movq (%esi, %ecx, 2), %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $11, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $10, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $2, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* 00 0r */
+        pmulhw %mm6, %mm1       /* 00 0g */
+        pmulhw %mm7, %mm2       /* 00 0b */
+
+        psllw $11, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+        
+        movq %mm0, (%esi, %ecx, 2)
+
+        addl $4, %ecx
+        js 2b
+	jmp 4f
+3:
+        movw (%esi, %ecx, 2), %ax
+        movd %eax, %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $11, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $10, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $2, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* 00 0r */
+        pmulhw %mm6, %mm1       /* 00 0g */
+        pmulhw %mm7, %mm2       /* 00 0b */
+
+        psllw $11, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movd %mm0, %eax
+        movw %ax, (%esi, %ecx, 2)
+
+        incl %ecx
+4:
+        cmpl $2, %ecx
+        jng 3b
+
+        addl bpl, %esi
+        decl %edx
+        jnz 1b
+5:
+        LEAVE
+
+
+shade_ximage_16_mmx_saturate:
+
+        pcmpeqw %mm3, %mm3
+        movq %mm3, %mm4
+        psllw $5, %mm3          /* ff e0 ff e0 ff e0 ff e0 */
+        psllw $6, %mm4          /* ff c0 ff c0 ff c0 ff c0 */
+
+1:      movl %ebx, %ecx
+        addl $3, %ecx
+        jns 3f
+2:
+        movq (%esi, %ecx, 2), %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $11, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $10, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $2, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* xx xr */
+        pmulhw %mm6, %mm1       /* xx xg */
+        pmulhw %mm7, %mm2       /* xx xb */
+
+        /* Saturate upper */
+        paddusw %mm3, %mm0      /* ff er */
+        paddusw %mm4, %mm1      /* ff cg */
+        paddusw %mm3, %mm2      /* ff eb */
+
+        psubw %mm4, %mm1        /* 00 0g */
+        psubw %mm3, %mm2        /* 00 0b */
+        
+        psllw $11, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movq %mm0, (%esi, %ecx, 2)
+
+        addl $4, %ecx
+        js 2b
+        jmp 4f
+3:
+        movw (%esi, %ecx, 2), %ax
+        movd %eax, %mm0
+
+        movq %mm0, %mm1         /* rg gb */
+        movq %mm0, %mm2         /* rg gb */
+        psrlw $5, %mm1          /* 0r rg */
+        psrlw $11, %mm0         /* 00 0r */
+        psllw $11, %mm2         /* b0 00 */
+        psllw $10, %mm1         /* g0 00 */
+        psllw $8, %mm0          /* 0r 00 */
+        psrlw $2, %mm1          /* 0g 00 */
+        psrlw $3, %mm2          /* 0b 00 */
+
+        pmulhw %mm5, %mm0       /* xx xr */
+        pmulhw %mm6, %mm1       /* xx xg */
+        pmulhw %mm7, %mm2       /* xx xb */
+
+        /* Saturate upper */
+        paddusw %mm3, %mm0      /* ff er */
+        paddusw %mm4, %mm1      /* ff cg */
+        paddusw %mm3, %mm2      /* ff eb */
+
+        psubw %mm4, %mm1        /* 00 0g */
+        psubw %mm3, %mm2        /* 00 0b */
+        
+        psllw $11, %mm0         /* r0 00 */
+        psllw $5, %mm1          /* 0g g0 */
+        por %mm2, %mm0          /* r0 0b */
+        por %mm1, %mm0          /* rg gb */
+
+        movd %mm0, %eax
+        movw %ax, (%esi, %ecx, 2)
+
+        incl %ecx
+4:
+        cmpl $2, %ecx
+        jng 3b
+
+        addl bpl, %esi
+        decl %edx
+        jnz 1b
+5:
+        LEAVE
+
+
+SHADE_XIMAGE_32:
+        ENTER
+
+        leal (%esi, %ebx, 4), %esi
+        negl %ebx
+        jz 3f
+
+        movd rm, %mm4
+        movd gm, %mm5
+        movd bm, %mm6
+        psllq $32, %mm4
+        psllq $16, %mm5
+        por %mm6, %mm4
+        por %mm5, %mm4
+
+        pcmpeqw %mm6, %mm6
+        psllw $15, %mm6                 /* 80 00 80 00 80 00 80 00 */
+        movq %mm6, %mm5
+        pmulhw %mm4, %mm5               /* Get correction factor */
+1:
+        movl %ebx, %ecx
+2:
+        movd (%esi, %ecx, 4), %mm1      /* 00 rr gg bb */
+        pxor %mm0, %mm0
+        punpcklbw %mm1, %mm0            /* 00 00 rr 00 gg 00 bb 00 */
+        pxor %mm6, %mm0                 /* Flip sign */
+
+        pmulhw %mm4, %mm0               /* 00 00 xx rr xx gg xx bb */
+        psubw %mm5, %mm0                /* Correct range */
+        packuswb %mm0, %mm0             /* 00 rr gg bb 00 rr gg bb */
+
+        movd %mm0, (%esi, %ecx, 4)
+
+        incl %ecx
+        jnz 2b
+
+        addl bpl, %esi
+        decl %edx
+        jnz 1b
+3:
+        LEAVE
+
+
+HAVE_MMX:
+	push	%ebx
+/* Check if bit 21 in flags word is writeable */
+	pushfl	
+	popl	%eax
+	movl	%eax,%ebx
+	xorl	$0x00200000, %eax
+	pushl	%eax
+	popfl
+	pushfl
+	popl	%eax
+
+	cmpl	%eax, %ebx
+	je	8f
+
+/* OK, we have CPUID */
+
+	movl	$1, %eax
+	cpuid
+	
+	test	$0x00800000, %edx
+	jz	8f
+
+	movl	$1, %eax	/* success, have mmx */
+	popl	%ebx
+	ret
+
+8:
+	xorl	%eax,%eax	/* failed, no mmx */
+	popl	%ebx
+	ret
+
+#if defined(__GNUC__) && !defined(_WIN32)
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif