summary refs log tree commit diff stats
path: root/src/fe-gtk/mmx_cmod.S
blob: 12e866de8671e99727cc57cdd51fbf9e10c30cd3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
@echo off
set WDK_ROOT=c:\WinDDK\7600.16385.1
cd ..
set DEV_32=%cd%\dep-x86
cd src
echo DEV32 = %DEV_32% > makeinc.mak
type makeinc.skel.mak >> makeinc.mak
set INCLUDE=%WDK_ROOT%\inc\api;%WDK_ROOT%\inc\crt;%WDK_ROOT%\inc\api\crt\stl70
set LIB=%WDK_ROOT%\lib\wxp\i386;%WDK_ROOT%\lib\Crt\i386
set OPATH=%PATH%
set PATH=%PROGRAMFILES(X86)%\Microsoft Visual Studio 10.0\VC\bin;%PROGRAMFILES(X86)%\Microsoft Visual Studio 10.0\Common7\IDE;%PROGRAMFILES(X86)%\Microsoft SDKs\Windows\v7.0A\Bin;%DEV_32%\bin
nmake /nologo /f makefile.mak clean
cd pixmaps
nmake /nologo /f makefile.mak
cd ..
nmake /nologo /f makefile.mak
cd ..\plugins
nmake /nologo /f makefile.mak clean
nmake /nologo /f makefile.mak
cd ..\build
set PATH=%OPATH%
set DEPS_ROOT=..\dep-x86
set XCHAT_DEST=..\tmp
rmdir /q /s %XCHAT_DEST%
mkdir %XCHAT_DEST%
echo 2> portable-mode
move portable-mode %XCHAT_DEST%
copy ..\src\fe-gtk\xchat.exe %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgtk-win32-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgdk_pixbuf-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgio-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libglib-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgmodule-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgobject-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgthread-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libpng14-14.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libgdk-win32-2.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libcairo-2.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libfontconfig-1.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libexpat-1.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\freetype6.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libpango-1.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libpangocairo-1.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libpangoft2-1.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libpangowin32-1.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libatk-1.0-0.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\intl.dll %XCHAT_DEST%
xcopy /q /s /i %DEPS_ROOT%\lib\gtk-2.0\2.10.0\engines\libpixmap.dll %XCHAT_DEST%\lib\gtk-2.0\2.10.0\engines\
copy %DEPS_ROOT%\lib\gtk-2.0\2.10.0\engines\libwimp.dll %XCHAT_DEST%\lib\gtk-2.0\2.10.0\engines
xcopy /q /s /i %DEPS_ROOT%\lib\gtk-2.0\modules\libgail.dll %XCHAT_DEST%\lib\gtk-2.0\modules\
xcopy /q /s /i etc %XCHAT_DEST%\etc
copy %DEPS_ROOT%\bin\libeay32.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\ssleay32.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\zlib1.dll %XCHAT_DEST%
copy %DEPS_ROOT%\bin\cert.pem %XCHAT_DEST%
copy %DEPS_ROOT%\bin\libenchant.dll %XCHAT_DEST%
xcopy /q /s /i %DEPS_ROOT%\lib\enchant\libenchant_myspell.dll %XCHAT_DEST%\lib\enchant\
xcopy /q /s /i ..\plugins\checksum\xcchecksum.dll %XCHAT_DEST%\plugins\
copy ..\plugins\lua\xclua.dll %XCHAT_DEST%\plugins
copy ..\plugins\python\xcpython.dll %XCHAT_DEST%\plugins
copy ..\plugins\tcl\xctcl.dll %XCHAT_DEST%\plugins
copy ..\plugins\upd\xcupd.dll %XCHAT_DEST%\plugins
copy ..\plugins\xdcc\xcxdcc.dll %XCHAT_DEST%\plugins
copy ..\plugins\xtray\xtray.dll %XCHAT_DEST%\plugins
copy ..\plugins\winamp\xcwinamp.dll %XCHAT_DEST%\plugins
copy %DEPS_ROOT%\bin\lua51.dll %XCHAT_DEST%
38' href='#n438'>438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
/*
 * Copyright (C) 1997-2001, Michael Jennings
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies of the Software, its documentation and marketing & publicity
 * materials, and acknowledgment shall be given in the documentation, materials
 * and software packages that this Software was used.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/* MMX routines for tinting XImages written by Willem Monsuwe <willem@stack.nl> */

/* Function calling conventions:
 *   shade_ximage_xx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
 */

#define data	8(%ebp)
#define bpl	12(%ebp)
#define w	16(%ebp)
#define h	20(%ebp)
#define rm	24(%ebp)
#define gm	28(%ebp)
#define bm	32(%ebp)

#ifdef UNDERSCORE_SYMBOLS /* need this to link with msvc */
#define SHADE_XIMAGE_15 _shade_ximage_15_mmx
#define SHADE_XIMAGE_16 _shade_ximage_16_mmx
#define SHADE_XIMAGE_32 _shade_ximage_32_mmx
#define HAVE_MMX _have_mmx
#else
#define SHADE_XIMAGE_15 shade_ximage_15_mmx
#define SHADE_XIMAGE_16 shade_ximage_16_mmx
#define SHADE_XIMAGE_32 shade_ximage_32_mmx
#define HAVE_MMX have_mmx
#endif

.globl SHADE_XIMAGE_15
.globl SHADE_XIMAGE_16
.globl SHADE_XIMAGE_32
.globl HAVE_MMX

.bss
.text
.align 8

#define ENTER                   \
        pushl %ebp              ;\
        movl %esp, %ebp         ;\
        pushl %ebx              ;\
        pushl %ecx              ;\
        pushl %edx              ;\
        pushl %edi              ;\
        pushl %esi              ;\
        movl data, %esi         ;\
        movl w, %ebx            ;\
        movl h, %edx

#define LEAVE                   \
4:                              ;\
        emms                    ;\
        popl %esi               ;\
        popl %edi               ;\
        popl %edx               ;\
        popl %ecx               ;\
        popl %ebx               ;\
        movl %ebp, %esp         ;\
        popl %ebp               ;\
        ret


SHADE_XIMAGE_15:
        ENTER

        leal -6(%esi, %ebx, 2), %esi
        negl %ebx
        jz 5f

        /* Setup multipliers */
        movd rm, %mm5
        movd gm, %mm6
        movd bm, %mm7
        punpcklwd %mm5, %mm5    /* 00 00 00 00 rm rm rm rm */
        punpcklwd %mm6, %mm6    /* 00 00 00 00 gm gm gm gm */
        punpcklwd %mm7, %mm7    /* 00 00 00 00 bm bm bm bm */
        punpckldq %mm5, %mm5    /* rm rm rm rm rm rm rm rm */
        punpckldq %mm6, %mm6    /* gm gm gm gm gm gm gm gm */
        punpckldq %mm7, %mm7    /* bm bm bm bm bm bm bm bm */

        cmpl $256, rm
        jg shade_ximage_15_mmx_saturate
        cmpl $256, gm
        jg shade_ximage_15_mmx_saturate
        cmpl $256, bm
        jg shade_ximage_15_mmx_saturate

1:      movl %ebx, %ecx
        addl $3, %ecx
        jns 3f
2:
        movq (%esi, %ecx, 2), %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $10, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $11, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $3, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* 00 0r */
        pmulhw %mm6, %mm1       /* 00 0g */
        pmulhw %mm7, %mm2       /* 00 0b */

        psllw $10, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */
        
        movq %mm0, (%esi, %ecx, 2)

        addl $4, %ecx
        js 2b
        jmp 4f
3:
        movw (%esi, %ecx, 2), %ax
        movd %eax, %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $10, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $11, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $3, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* 00 0r */
        pmulhw %mm6, %mm1       /* 00 0g */
        pmulhw %mm7, %mm2       /* 00 0b */

        psllw $10, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movd %mm0, %eax
        movw %ax, (%esi, %ecx, 2)

        incl %ecx
4:
        cmpl $2, %ecx
        jng 3b

        addl bpl, %esi
        decl %edx
        jnz 1b
5:
        LEAVE


shade_ximage_15_mmx_saturate:

        pcmpeqw %mm3, %mm3
        psllw $5, %mm3          /* ff e0 ff e0 ff e0 ff e0 */

1:      movl %ebx, %ecx
        addl $3, %ecx
        jns 3f
2:
        movq (%esi, %ecx, 2), %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $10, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $11, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $3, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* xx xr */
        pmulhw %mm6, %mm1       /* xx xg */
        pmulhw %mm7, %mm2       /* xx xb */

        /* Saturate upper */
        paddusw %mm3, %mm0      /* ff er */
        paddusw %mm3, %mm1      /* ff eg */
        paddusw %mm3, %mm2      /* ff eb */

        psubw %mm3, %mm0        /* 00 0r */
        psubw %mm3, %mm1        /* 00 0g */
        psubw %mm3, %mm2        /* 00 0b */
        
        psllw $10, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movq %mm0, (%esi, %ecx, 2)

        addl $4, %ecx
        js 2b
        jmp 4f
3:
        movw (%esi, %ecx, 2), %ax
        movd %eax, %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $10, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $11, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $3, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* xx xr */
        pmulhw %mm6, %mm1       /* xx xg */
        pmulhw %mm7, %mm2       /* xx xb */

        /* Saturate upper */
        paddusw %mm3, %mm0      /* ff er */
        paddusw %mm3, %mm1      /* ff eg */
        paddusw %mm3, %mm2      /* ff eb */

        psubw %mm3, %mm0        /* 00 0r */
        psubw %mm3, %mm1        /* 00 0g */
        psubw %mm3, %mm2        /* 00 0b */
        
        psllw $10, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movd %mm0, %eax
        movw %ax, (%esi, %ecx, 2)

        incl %ecx
4:
        cmpl $2, %ecx
        jng 3b

        addl bpl, %esi
        decl %edx
        jnz 1b
5:
        LEAVE


SHADE_XIMAGE_16:
        ENTER

        leal -6(%esi, %ebx, 2), %esi
        negl %ebx
        jz 5f

        /* Setup multipliers */
        movd rm, %mm5
        movd gm, %mm6
        movd bm, %mm7
        punpcklwd %mm5, %mm5    /* 00 00 00 00 rm rm rm rm */
        punpcklwd %mm6, %mm6    /* 00 00 00 00 gm gm gm gm */
        punpcklwd %mm7, %mm7    /* 00 00 00 00 bm bm bm bm */
        punpckldq %mm5, %mm5    /* rm rm rm rm rm rm rm rm */
        punpckldq %mm6, %mm6    /* gm gm gm gm gm gm gm gm */
        punpckldq %mm7, %mm7    /* bm bm bm bm bm bm bm bm */

        cmpl $256, rm
        jg shade_ximage_16_mmx_saturate
        cmpl $256, gm
        jg shade_ximage_16_mmx_saturate
        cmpl $256, bm
        jg shade_ximage_16_mmx_saturate

1:      movl %ebx, %ecx
        addl $3, %ecx
        jns 3f
2:
        movq (%esi, %ecx, 2), %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $11, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $10, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $2, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* 00 0r */
        pmulhw %mm6, %mm1       /* 00 0g */
        pmulhw %mm7, %mm2       /* 00 0b */

        psllw $11, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */
        
        movq %mm0, (%esi, %ecx, 2)

        addl $4, %ecx
        js 2b
	jmp 4f
3:
        movw (%esi, %ecx, 2), %ax
        movd %eax, %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $11, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $10, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $2, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* 00 0r */
        pmulhw %mm6, %mm1       /* 00 0g */
        pmulhw %mm7, %mm2       /* 00 0b */

        psllw $11, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movd %mm0, %eax
        movw %ax, (%esi, %ecx, 2)

        incl %ecx
4:
        cmpl $2, %ecx
        jng 3b

        addl bpl, %esi
        decl %edx
        jnz 1b
5:
        LEAVE


shade_ximage_16_mmx_saturate:

        pcmpeqw %mm3, %mm3
        movq %mm3, %mm4
        psllw $5, %mm3          /* ff e0 ff e0 ff e0 ff e0 */
        psllw $6, %mm4          /* ff c0 ff c0 ff c0 ff c0 */

1:      movl %ebx, %ecx
        addl $3, %ecx
        jns 3f
2:
        movq (%esi, %ecx, 2), %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $11, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $10, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $2, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* xx xr */
        pmulhw %mm6, %mm1       /* xx xg */
        pmulhw %mm7, %mm2       /* xx xb */

        /* Saturate upper */
        paddusw %mm3, %mm0      /* ff er */
        paddusw %mm4, %mm1      /* ff cg */
        paddusw %mm3, %mm2      /* ff eb */

        psubw %mm4, %mm1        /* 00 0g */
        psubw %mm3, %mm2        /* 00 0b */
        
        psllw $11, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movq %mm0, (%esi, %ecx, 2)

        addl $4, %ecx
        js 2b
        jmp 4f
3:
        movw (%esi, %ecx, 2), %ax
        movd %eax, %mm0

        movq %mm0, %mm1         /* rg gb */
        movq %mm0, %mm2         /* rg gb */
        psrlw $5, %mm1          /* 0r rg */
        psrlw $11, %mm0         /* 00 0r */
        psllw $11, %mm2         /* b0 00 */
        psllw $10, %mm1         /* g0 00 */
        psllw $8, %mm0          /* 0r 00 */
        psrlw $2, %mm1          /* 0g 00 */
        psrlw $3, %mm2          /* 0b 00 */

        pmulhw %mm5, %mm0       /* xx xr */
        pmulhw %mm6, %mm1       /* xx xg */
        pmulhw %mm7, %mm2       /* xx xb */

        /* Saturate upper */
        paddusw %mm3, %mm0      /* ff er */
        paddusw %mm4, %mm1      /* ff cg */
        paddusw %mm3, %mm2      /* ff eb */

        psubw %mm4, %mm1        /* 00 0g */
        psubw %mm3, %mm2        /* 00 0b */
        
        psllw $11, %mm0         /* r0 00 */
        psllw $5, %mm1          /* 0g g0 */
        por %mm2, %mm0          /* r0 0b */
        por %mm1, %mm0          /* rg gb */

        movd %mm0, %eax
        movw %ax, (%esi, %ecx, 2)

        incl %ecx
4:
        cmpl $2, %ecx
        jng 3b

        addl bpl, %esi
        decl %edx
        jnz 1b
5:
        LEAVE


SHADE_XIMAGE_32:
        ENTER

        leal (%esi, %ebx, 4), %esi
        negl %ebx
        jz 3f

        movd rm, %mm4
        movd gm, %mm5
        movd bm, %mm6
        psllq $32, %mm4
        psllq $16, %mm5
        por %mm6, %mm4
        por %mm5, %mm4

        pcmpeqw %mm6, %mm6
        psllw $15, %mm6                 /* 80 00 80 00 80 00 80 00 */
        movq %mm6, %mm5
        pmulhw %mm4, %mm5               /* Get correction factor */
1:
        movl %ebx, %ecx
2:
        movd (%esi, %ecx, 4), %mm1      /* 00 rr gg bb */
        pxor %mm0, %mm0
        punpcklbw %mm1, %mm0            /* 00 00 rr 00 gg 00 bb 00 */
        pxor %mm6, %mm0                 /* Flip sign */

        pmulhw %mm4, %mm0               /* 00 00 xx rr xx gg xx bb */
        psubw %mm5, %mm0                /* Correct range */
        packuswb %mm0, %mm0             /* 00 rr gg bb 00 rr gg bb */

        movd %mm0, (%esi, %ecx, 4)

        incl %ecx
        jnz 2b

        addl bpl, %esi
        decl %edx
        jnz 1b
3:
        LEAVE


HAVE_MMX:
	push	%ebx
/* Check if bit 21 in flags word is writeable */
	pushfl	
	popl	%eax
	movl	%eax,%ebx
	xorl	$0x00200000, %eax
	pushl	%eax
	popfl
	pushfl
	popl	%eax

	cmpl	%eax, %ebx
	je	8f

/* OK, we have CPUID */

	movl	$1, %eax
	cpuid
	
	test	$0x00800000, %edx
	jz	8f

	movl	$1, %eax	/* success, have mmx */
	popl	%ebx
	ret

8:
	xorl	%eax,%eax	/* failed, no mmx */
	popl	%ebx
	ret

#if defined(__GNUC__) && !defined(_WIN32)
.section .note.GNU-stack, "", @progbits
.previous
#endif