summaryrefslogtreecommitdiffstats
path: root/src/lib/kStuff/kProfiler2/prfx86msc.asm
blob: c7339583c91827c98b7deedced0473e8ade1d7e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
; $Id: prfx86msc.asm 29 2009-07-01 20:30:29Z bird $
;; @file
; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
;

;
; Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
;
; Permission is hereby granted, free of charge, to any person
; obtaining a copy of this software and associated documentation
; files (the "Software"), to deal in the Software without
; restriction, including without limitation the rights to use,
; copy, modify, merge, publish, distribute, sublicense, and/or sell
; copies of the Software, and to permit persons to whom the
; Software is furnished to do so, subject to the following
; conditions:
;
; The above copyright notice and this permission notice shall be
; included in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
; OTHER DEALINGS IN THE SOFTWARE.
;

[section .data]
;
g_fCalibrated:
        dd 0
g_OverheadAdj:
        dd 0

[section .text]

extern KPRF_ENTER
extern KPRF_LEAVE

global __penter
global __pexit

;ifdef  UNDEFINED
global common_return_path
global common_overhead
global common_no_overhead
global calibrate
global calib_inner_update_minimum
global calib_inner_next
global calib_outer_dec
global calib_outer_inc
global calib_done
global calib_nullproc
;endif


;;
; On x86 the call to this function has been observed to be put before
; creating the stack frame, as the very first instruction in the function.
;
; Thus the stack layout is as follows:
;       24      return address of the calling function.
;       20      our return address - the address of the calling function + 5.
;       1c      eax
;       18      edx
;       14      eflags
;       10      ecx
;       c       tsc high       - param 3
;       8       tsc low
;       4       frame pointer  - param 2
;       0       function ptr   - param 1
;
;
align 16
__penter:
        ; save volatile register and get the time stamp.
        push    eax
        push    edx
        rdtsc
        pushfd
        push    ecx

        ; setting up the enter call frame (cdecl).
        sub     esp, 4 + 4 + 8
        mov     [esp + 0ch], edx        ; Param 3 - the timestamp
        mov     [esp + 08h], eax
        lea     edx, [esp + 24h]        ; Param 2 - frame pointer (pointer to the return address of the function calling us)
        mov     [esp + 04h], edx
        mov     eax, [esp + 20h]        ; Param 1 - The function address
        sub     eax, 5                  ; call instruction
        mov     [esp], eax

        call    KPRF_ENTER
        jmp     common_return_path


;;
; On x86 the call to this function has been observed to be put right before
; return instruction. This fact matters since since we have to calc the same
; stack address as in _penter.
;
; Thus the stack layout is as follows:
;       24      return address of the calling function.
;       20      our return address - the address of the calling function + 5.
;       1c      eax
;       18      edx
;       14      eflags
;       10      ecx
;       c       tsc high       - param 3
;       8       tsc low
;       4       frame pointer  - param 2
;       0       function ptr   - param 1
;
;
align 16
__pexit:
        ; save volatile register and get the time stamp.
        push    eax
        push    edx
        rdtsc
        pushfd
        push    ecx

        ; setting up the leave call frame (cdecl).
        sub     esp, 4 + 4 + 8
        mov     [esp + 0ch], edx        ; Param 3 - the timestamp
        mov     [esp + 08h], eax
        lea     edx, [esp + 24h]        ; Param 2 - frame pointer (pointer to the return address of the function calling us)
        mov     [esp + 04h], edx
        mov     eax, [esp + 20h]        ; Param 1 - Some address in the function.
        sub     eax, 5                  ; call instruction
        mov     [esp], eax

        call    KPRF_LEAVE
        jmp common_return_path


;;
; This is the common return path for both the enter and exit hooks.
; It's kept common because we can then use the same overhead adjustment
; and save some calibration efforts. It also saves space :-)
align 16
common_return_path:
        ; Update overhead
        test    eax, eax
        jz      common_no_overhead
        cmp     byte [g_fCalibrated], 0
        jnz     common_overhead
        call    calibrate
common_overhead:
        mov     ecx, eax                ; ecx <- pointer to overhead counter.
        mov     eax, [g_OverheadAdj]    ; apply the adjustment before reading tsc
        sub     [esp + 08h], eax
        sbb     dword [esp + 0ch], 0

        rdtsc
        sub     eax, [esp + 08h]
        sbb     edx, [esp + 0ch]
        add     [ecx], eax
        adc     [ecx + 4], edx
common_no_overhead:
        add     esp, 4 + 4 + 8

        ; restore volatile registers.
        pop     ecx
        popfd
        pop     edx
        pop     eax
        ret

;;
; Data esi points to while we're calibrating.
struc CALIBDATA
    .OverheadLo resd 1
    .OverheadHi resd 1
    .ProfiledLo resd 1
    .ProfiledHi resd 1
    .EnterTSLo  resd 1
    .EnterTSHi  resd 1
    .MinLo      resd 1
    .MinHi      resd 1
endstruc



align 16
;;
; Do necessary calibrations.
;
calibrate:
        ; prolog
        push    ebp
        mov     ebp, esp
        pushfd
        pushad
        sub     esp, CALIBDATA_size
        mov     esi, esp                ; esi points to the CALIBDATA

        ;
        ; Indicate that we have finished calibrating.
        ;
        mov     eax, 1
        xchg    dword [g_fCalibrated], eax

        ;
        ; The outer loop - find the right adjustment.
        ;
        mov     ebx, 200h               ; loop counter.
calib_outer_loop:

        ;
        ; The inner loop - calls the function number of times to establish a
        ;                  good minimum value
        ;
        mov     ecx, 200h
        mov     dword [esi + CALIBDATA.MinLo], 0ffffffffh
        mov     dword [esi + CALIBDATA.MinHi], 07fffffffh
calib_inner_loop:

        ; zero the overhead and profiled times.
        xor     eax, eax
        mov     [esi + CALIBDATA.OverheadLo], eax
        mov     [esi + CALIBDATA.OverheadHi], eax
        mov     [esi + CALIBDATA.ProfiledLo], eax
        mov     [esi + CALIBDATA.ProfiledHi], eax
        call    calib_nullproc

        ; subtract the overhead
        mov     eax, [esi + CALIBDATA.ProfiledLo]
        mov     edx, [esi + CALIBDATA.ProfiledHi]
        sub     eax, [esi + CALIBDATA.OverheadLo]
        sbb     edx, [esi + CALIBDATA.OverheadHi]

        ; update the minimum value.
        test    edx, 080000000h
        jnz near calib_outer_dec        ; if negative, just simplify and shortcut
        cmp     edx, [esi + CALIBDATA.MinHi]
        jg      calib_inner_next
        jl      calib_inner_update_minimum
        cmp     eax, [esi + CALIBDATA.MinLo]
        jge     calib_inner_next
calib_inner_update_minimum:
        mov     [esi + CALIBDATA.MinLo], eax
        mov     [esi + CALIBDATA.MinHi], edx
calib_inner_next:
        loop    calib_inner_loop

        ; Is the minimum value acceptable?
        test    dword [esi + CALIBDATA.MinHi], 80000000h
        jnz     calib_outer_dec         ; simplify if negative.
        cmp     dword [esi + CALIBDATA.MinHi], 0
        jnz     calib_outer_inc         ; this shouldn't be possible
        cmp     dword [esi + CALIBDATA.MinLo], 1fh
        jbe     calib_outer_dec         ; too low - 2 ticks per pair is the minimum!
        cmp     dword [esi + CALIBDATA.MinLo], 30h
        jbe     calib_done              ; this is fine!
calib_outer_inc:
        inc     dword [g_OverheadAdj]
        jmp     calib_outer_next
calib_outer_dec:
        cmp     dword [g_OverheadAdj], 1
        je      calib_done
        dec     dword [g_OverheadAdj]
calib_outer_next:
        dec     ebx
        jnz     calib_outer_loop
calib_done:

        ; epilog
        add     esp, CALIBDATA_size
        popad
        popfd
        leave
        ret




;;
; The calibration __penter - this must be identical to the real thing except for the KPRF call.
align 16
calib_penter:
        ; This part must be identical
        push    eax
        push    edx
        rdtsc
        pushfd
        push    ecx

        ; store the entry
        mov     [esi + CALIBDATA.EnterTSLo], eax
        mov     [esi + CALIBDATA.EnterTSHi], edx

        ; create the call frame
        push    edx
        push    eax
        push    0
        push    0

        lea     eax, [esi + CALIBDATA.OverheadLo]
        jmp     common_overhead


;;
; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
align 16
calib_pexit:
        ; This part must be identical
        push    eax
        push    edx
        rdtsc
        pushfd
        push    ecx

        ; update the time
        push    eax
        push    edx
        sub     eax, [esi + CALIBDATA.EnterTSLo]
        sbb     edx, [esi + CALIBDATA.EnterTSHi]
        add     [esi + CALIBDATA.ProfiledLo], eax
        adc     [esi + CALIBDATA.ProfiledHi], edx
        pop     edx
        pop     eax

        ; create the call frame
        push    edx
        push    eax
        push    0
        push    0

        lea     eax, [esi + CALIBDATA.EnterTSLo]
        jmp     common_overhead


;;
; The 'function' we're profiling.
; The general idea is that each pair should take something like 2-10 ticks.
;
; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
align 16
calib_nullproc:
        call    calib_penter ;0
        call    calib_pexit

        call    calib_penter ;1
        call    calib_pexit

        call    calib_penter ;2
        call    calib_pexit

        call    calib_penter ;3
        call    calib_pexit

        call    calib_penter ;4
        call    calib_pexit

        call    calib_penter ;5
        call    calib_pexit

        call    calib_penter ;6
        call    calib_pexit

        call    calib_penter ;7
        call    calib_pexit

        call    calib_penter ;8
        call    calib_pexit

        call    calib_penter ;9
        call    calib_pexit

        call    calib_penter ;a
        call    calib_pexit

        call    calib_penter ;b
        call    calib_pexit

        call    calib_penter ;c
        call    calib_pexit

        call    calib_penter ;d
        call    calib_pexit

        call    calib_penter ;e
        call    calib_pexit

        call    calib_penter ;f
        call    calib_pexit
        ret