diff options
Diffstat (limited to 'src/fe-gtk/mmx_cmod.S')
-rw-r--r-- | src/fe-gtk/mmx_cmod.S | 530 |
1 files changed, 0 insertions, 530 deletions
diff --git a/src/fe-gtk/mmx_cmod.S b/src/fe-gtk/mmx_cmod.S deleted file mode 100644 index 12e866de..00000000 --- a/src/fe-gtk/mmx_cmod.S +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (C) 1997-2001, Michael Jennings - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies of the Software, its documentation and marketing & publicity - * materials, and acknowledgment shall be given in the documentation, materials - * and software packages that this Software was used. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* MMX routines for tinting XImages written by Willem Monsuwe <willem@stack.nl> */ - -/* Function calling conventions: - * shade_ximage_xx(void *data, int bpl, int w, int h, int rm, int gm, int bm); - */ - -#define data 8(%ebp) -#define bpl 12(%ebp) -#define w 16(%ebp) -#define h 20(%ebp) -#define rm 24(%ebp) -#define gm 28(%ebp) -#define bm 32(%ebp) - -#ifdef UNDERSCORE_SYMBOLS /* need this to link with msvc */ -#define SHADE_XIMAGE_15 _shade_ximage_15_mmx -#define SHADE_XIMAGE_16 _shade_ximage_16_mmx -#define SHADE_XIMAGE_32 _shade_ximage_32_mmx -#define HAVE_MMX _have_mmx -#else -#define SHADE_XIMAGE_15 shade_ximage_15_mmx -#define SHADE_XIMAGE_16 shade_ximage_16_mmx -#define SHADE_XIMAGE_32 shade_ximage_32_mmx -#define HAVE_MMX have_mmx -#endif - -.globl SHADE_XIMAGE_15 -.globl SHADE_XIMAGE_16 -.globl SHADE_XIMAGE_32 -.globl HAVE_MMX - -.bss -.text -.align 8 - -#define ENTER \ - pushl %ebp ;\ - movl %esp, %ebp ;\ - pushl %ebx ;\ - pushl %ecx ;\ - pushl %edx ;\ - pushl %edi ;\ - pushl %esi ;\ - movl data, %esi ;\ - movl w, %ebx ;\ - movl h, %edx - -#define LEAVE \ -4: ;\ - emms ;\ - popl %esi ;\ - popl %edi ;\ - popl %edx ;\ - popl %ecx ;\ - popl %ebx ;\ - movl %ebp, %esp ;\ - popl %ebp ;\ - ret - - -SHADE_XIMAGE_15: - ENTER - - leal -6(%esi, %ebx, 2), %esi - negl %ebx - jz 5f - - /* Setup multipliers */ - movd rm, %mm5 - movd gm, %mm6 - movd bm, %mm7 - punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ - punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ - punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ - punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ - punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ - punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ - - cmpl $256, rm - jg shade_ximage_15_mmx_saturate - cmpl $256, gm - jg shade_ximage_15_mmx_saturate - cmpl $256, bm - jg shade_ximage_15_mmx_saturate - -1: movl %ebx, %ecx - addl $3, %ecx - jns 3f -2: - movq (%esi, %ecx, 2), %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $10, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $11, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $3, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* 00 0r */ - pmulhw %mm6, %mm1 /* 00 0g */ - pmulhw %mm7, %mm2 /* 00 0b */ - - psllw $10, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movq %mm0, (%esi, %ecx, 2) - - addl $4, %ecx - js 2b - jmp 4f -3: - movw (%esi, %ecx, 2), %ax - movd %eax, %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $10, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $11, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $3, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* 00 0r */ - pmulhw %mm6, %mm1 /* 00 0g */ - pmulhw %mm7, %mm2 /* 00 0b */ - - psllw $10, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movd %mm0, %eax - movw %ax, (%esi, %ecx, 2) - - incl %ecx -4: - cmpl $2, %ecx - jng 3b - - addl bpl, %esi - decl %edx - jnz 1b -5: - LEAVE - - -shade_ximage_15_mmx_saturate: - - pcmpeqw %mm3, %mm3 - psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ - -1: movl %ebx, %ecx - addl $3, %ecx - jns 3f -2: - movq (%esi, %ecx, 2), %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $10, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $11, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $3, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* xx xr */ - pmulhw %mm6, %mm1 /* xx xg */ - pmulhw %mm7, %mm2 /* xx xb */ - - /* Saturate upper */ - paddusw %mm3, %mm0 /* ff er */ - paddusw %mm3, %mm1 /* ff eg */ - paddusw %mm3, %mm2 /* ff eb */ - - psubw %mm3, %mm0 /* 00 0r */ - psubw %mm3, %mm1 /* 00 0g */ - psubw %mm3, %mm2 /* 00 0b */ - - psllw $10, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movq %mm0, (%esi, %ecx, 2) - - addl $4, %ecx - js 2b - jmp 4f -3: - movw (%esi, %ecx, 2), %ax - movd %eax, %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $10, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $11, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $3, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* xx xr */ - pmulhw %mm6, %mm1 /* xx xg */ - pmulhw %mm7, %mm2 /* xx xb */ - - /* Saturate upper */ - paddusw %mm3, %mm0 /* ff er */ - paddusw %mm3, %mm1 /* ff eg */ - paddusw %mm3, %mm2 /* ff eb */ - - psubw %mm3, %mm0 /* 00 0r */ - psubw %mm3, %mm1 /* 00 0g */ - psubw %mm3, %mm2 /* 00 0b */ - - psllw $10, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movd %mm0, %eax - movw %ax, (%esi, %ecx, 2) - - incl %ecx -4: - cmpl $2, %ecx - jng 3b - - addl bpl, %esi - decl %edx - jnz 1b -5: - LEAVE - - -SHADE_XIMAGE_16: - ENTER - - leal -6(%esi, %ebx, 2), %esi - negl %ebx - jz 5f - - /* Setup multipliers */ - movd rm, %mm5 - movd gm, %mm6 - movd bm, %mm7 - punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ - punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ - punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ - punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ - punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ - punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ - - cmpl $256, rm - jg shade_ximage_16_mmx_saturate - cmpl $256, gm - jg shade_ximage_16_mmx_saturate - cmpl $256, bm - jg shade_ximage_16_mmx_saturate - -1: movl %ebx, %ecx - addl $3, %ecx - jns 3f -2: - movq (%esi, %ecx, 2), %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $11, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $10, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $2, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* 00 0r */ - pmulhw %mm6, %mm1 /* 00 0g */ - pmulhw %mm7, %mm2 /* 00 0b */ - - psllw $11, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movq %mm0, (%esi, %ecx, 2) - - addl $4, %ecx - js 2b - jmp 4f -3: - movw (%esi, %ecx, 2), %ax - movd %eax, %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $11, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $10, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $2, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* 00 0r */ - pmulhw %mm6, %mm1 /* 00 0g */ - pmulhw %mm7, %mm2 /* 00 0b */ - - psllw $11, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movd %mm0, %eax - movw %ax, (%esi, %ecx, 2) - - incl %ecx -4: - cmpl $2, %ecx - jng 3b - - addl bpl, %esi - decl %edx - jnz 1b -5: - LEAVE - - -shade_ximage_16_mmx_saturate: - - pcmpeqw %mm3, %mm3 - movq %mm3, %mm4 - psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ - psllw $6, %mm4 /* ff c0 ff c0 ff c0 ff c0 */ - -1: movl %ebx, %ecx - addl $3, %ecx - jns 3f -2: - movq (%esi, %ecx, 2), %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $11, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $10, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $2, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* xx xr */ - pmulhw %mm6, %mm1 /* xx xg */ - pmulhw %mm7, %mm2 /* xx xb */ - - /* Saturate upper */ - paddusw %mm3, %mm0 /* ff er */ - paddusw %mm4, %mm1 /* ff cg */ - paddusw %mm3, %mm2 /* ff eb */ - - psubw %mm4, %mm1 /* 00 0g */ - psubw %mm3, %mm2 /* 00 0b */ - - psllw $11, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movq %mm0, (%esi, %ecx, 2) - - addl $4, %ecx - js 2b - jmp 4f -3: - movw (%esi, %ecx, 2), %ax - movd %eax, %mm0 - - movq %mm0, %mm1 /* rg gb */ - movq %mm0, %mm2 /* rg gb */ - psrlw $5, %mm1 /* 0r rg */ - psrlw $11, %mm0 /* 00 0r */ - psllw $11, %mm2 /* b0 00 */ - psllw $10, %mm1 /* g0 00 */ - psllw $8, %mm0 /* 0r 00 */ - psrlw $2, %mm1 /* 0g 00 */ - psrlw $3, %mm2 /* 0b 00 */ - - pmulhw %mm5, %mm0 /* xx xr */ - pmulhw %mm6, %mm1 /* xx xg */ - pmulhw %mm7, %mm2 /* xx xb */ - - /* Saturate upper */ - paddusw %mm3, %mm0 /* ff er */ - paddusw %mm4, %mm1 /* ff cg */ - paddusw %mm3, %mm2 /* ff eb */ - - psubw %mm4, %mm1 /* 00 0g */ - psubw %mm3, %mm2 /* 00 0b */ - - psllw $11, %mm0 /* r0 00 */ - psllw $5, %mm1 /* 0g g0 */ - por %mm2, %mm0 /* r0 0b */ - por %mm1, %mm0 /* rg gb */ - - movd %mm0, %eax - movw %ax, (%esi, %ecx, 2) - - incl %ecx -4: - cmpl $2, %ecx - jng 3b - - addl bpl, %esi - decl %edx - jnz 1b -5: - LEAVE - - -SHADE_XIMAGE_32: - ENTER - - leal (%esi, %ebx, 4), %esi - negl %ebx - jz 3f - - movd rm, %mm4 - movd gm, %mm5 - movd bm, %mm6 - psllq $32, %mm4 - psllq $16, %mm5 - por %mm6, %mm4 - por %mm5, %mm4 - - pcmpeqw %mm6, %mm6 - psllw $15, %mm6 /* 80 00 80 00 80 00 80 00 */ - movq %mm6, %mm5 - pmulhw %mm4, %mm5 /* Get correction factor */ -1: - movl %ebx, %ecx -2: - movd (%esi, %ecx, 4), %mm1 /* 00 rr gg bb */ - pxor %mm0, %mm0 - punpcklbw %mm1, %mm0 /* 00 00 rr 00 gg 00 bb 00 */ - pxor %mm6, %mm0 /* Flip sign */ - - pmulhw %mm4, %mm0 /* 00 00 xx rr xx gg xx bb */ - psubw %mm5, %mm0 /* Correct range */ - packuswb %mm0, %mm0 /* 00 rr gg bb 00 rr gg bb */ - - movd %mm0, (%esi, %ecx, 4) - - incl %ecx - jnz 2b - - addl bpl, %esi - decl %edx - jnz 1b -3: - LEAVE - - -HAVE_MMX: - push %ebx -/* Check if bit 21 in flags word is writeable */ - pushfl - popl %eax - movl %eax,%ebx - xorl $0x00200000, %eax - pushl %eax - popfl - pushfl - popl %eax - - cmpl %eax, %ebx - je 8f - -/* OK, we have CPUID */ - - movl $1, %eax - cpuid - - test $0x00800000, %edx - jz 8f - - movl $1, %eax /* success, have mmx */ - popl %ebx - ret - -8: - xorl %eax,%eax /* failed, no mmx */ - popl %ebx - ret - -#if defined(__GNUC__) && !defined(_WIN32) -.section .note.GNU-stack, "", @progbits -.previous -#endif |