summaryrefslogtreecommitdiffstats
path: root/media/libjpeg/simd/i386/jsimdcpu.asm
blob: ddcafa9e2139ca20977f8aa6837e0d6bdb73ea48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
;
; jsimdcpu.asm - SIMD instruction support check
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208

%include "jsimdext.inc"

; --------------------------------------------------------------------------
    SECTION     SEG_TEXT
    BITS        32
;
; Check if the CPU supports SIMD instructions
;
; GLOBAL(unsigned int)
; jpeg_simd_cpu_support(void)
;

    align       32
    GLOBAL_FUNCTION(jpeg_simd_cpu_support)

EXTN(jpeg_simd_cpu_support):
    push        ebx
;   push        ecx                     ; need not be preserved
;   push        edx                     ; need not be preserved
;   push        esi                     ; unused
    push        edi

    xor         edi, edi                ; simd support flag

    pushfd
    pop         eax
    mov         edx, eax
    xor         eax, 1<<21              ; flip ID bit in EFLAGS
    push        eax
    popfd
    pushfd
    pop         eax
    xor         eax, edx
    jz          near .return            ; CPUID is not supported

    ; Check whether CPUID leaf 07H is supported
    ; (leaf 07H is used to check for AVX2 instruction support)
    xor         eax, eax
    cpuid
    test        eax, eax
    jz          near .return
    cmp         eax, 7
    jl          short .no_avx2          ; Maximum leaf < 07H

    ; Check for AVX2 instruction support
    mov         eax, 7
    xor         ecx, ecx
    cpuid
    mov         eax, ebx
    test        eax, 1<<5               ; bit5:AVX2
    jz          short .no_avx2

    ; Check for AVX2 O/S support
    mov         eax, 1
    xor         ecx, ecx
    cpuid
    test        ecx, 1<<27
    jz          short .no_avx2          ; O/S does not support XSAVE
    test        ecx, 1<<28
    jz          short .no_avx2          ; CPU does not support AVX2

    xor         ecx, ecx
    xgetbv
    and         eax, 6
    cmp         eax, 6                  ; O/S does not manage XMM/YMM state
                                        ; using XSAVE
    jnz         short .no_avx2

    or          edi, JSIMD_AVX2
.no_avx2:

    ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
    xor         eax, eax
    inc         eax
    cpuid
    mov         eax, edx                ; eax = Standard feature flags

    ; Check for MMX instruction support
    test        eax, 1<<23              ; bit23:MMX
    jz          short .no_mmx
    or          edi, byte JSIMD_MMX
.no_mmx:
    test        eax, 1<<25              ; bit25:SSE
    jz          short .no_sse
    or          edi, byte JSIMD_SSE
.no_sse:
    test        eax, 1<<26              ; bit26:SSE2
    jz          short .no_sse2
    or          edi, byte JSIMD_SSE2
.no_sse2:

    ; Check for 3DNow! instruction support
    mov         eax, 0x80000000
    cpuid
    cmp         eax, 0x80000000
    jbe         short .return

    mov         eax, 0x80000001
    cpuid
    mov         eax, edx                ; eax = Extended feature flags

    test        eax, 1<<31              ; bit31:3DNow!(vendor independent)
    jz          short .no_3dnow
    or          edi, byte JSIMD_3DNOW
.no_3dnow:

.return:
    mov         eax, edi

    pop         edi
;   pop         esi                     ; unused
;   pop         edx                     ; need not be preserved
;   pop         ecx                     ; need not be preserved
    pop         ebx
    ret

; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
    align       32