summaryrefslogtreecommitdiffstats
path: root/src/lib/kStuff/kProfiler2/prfx86msc.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/kStuff/kProfiler2/prfx86msc.asm')
-rw-r--r--src/lib/kStuff/kProfiler2/prfx86msc.asm393
1 files changed, 393 insertions, 0 deletions
diff --git a/src/lib/kStuff/kProfiler2/prfx86msc.asm b/src/lib/kStuff/kProfiler2/prfx86msc.asm
new file mode 100644
index 0000000..c733958
--- /dev/null
+++ b/src/lib/kStuff/kProfiler2/prfx86msc.asm
@@ -0,0 +1,393 @@
+; $Id: prfx86msc.asm 29 2009-07-01 20:30:29Z bird $
+;; @file
+; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
+;
+
+;
+; Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
+;
+; Permission is hereby granted, free of charge, to any person
+; obtaining a copy of this software and associated documentation
+; files (the "Software"), to deal in the Software without
+; restriction, including without limitation the rights to use,
+; copy, modify, merge, publish, distribute, sublicense, and/or sell
+; copies of the Software, and to permit persons to whom the
+; Software is furnished to do so, subject to the following
+; conditions:
+;
+; The above copyright notice and this permission notice shall be
+; included in all copies or substantial portions of the Software.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+; OTHER DEALINGS IN THE SOFTWARE.
+;
+
+[section .data]
+;
+g_fCalibrated:
+ dd 0
+g_OverheadAdj:
+ dd 0
+
+[section .text]
+
+extern KPRF_ENTER
+extern KPRF_LEAVE
+
+global __penter
+global __pexit
+
+;ifdef UNDEFINED
+global common_return_path
+global common_overhead
+global common_no_overhead
+global calibrate
+global calib_inner_update_minimum
+global calib_inner_next
+global calib_outer_dec
+global calib_outer_inc
+global calib_done
+global calib_nullproc
+;endif
+
+
+;;
+; On x86 the call to this function has been observed to be put before
+; creating the stack frame, as the very first instruction in the function.
+;
+; Thus the stack layout is as follows:
+; 24 return address of the calling function.
+; 20 our return address - the address of the calling function + 5.
+; 1c eax
+; 18 edx
+; 14 eflags
+; 10 ecx
+; c tsc high - param 3
+; 8 tsc low
+; 4 frame pointer - param 2
+; 0 function ptr - param 1
+;
+;
+align 16
+__penter:
+ ; save volatile register and get the time stamp.
+ push eax
+ push edx
+ rdtsc
+ pushfd
+ push ecx
+
+ ; setting up the enter call frame (cdecl).
+ sub esp, 4 + 4 + 8
+ mov [esp + 0ch], edx ; Param 3 - the timestamp
+ mov [esp + 08h], eax
+ lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
+ mov [esp + 04h], edx
+ mov eax, [esp + 20h] ; Param 1 - The function address
+ sub eax, 5 ; call instruction
+ mov [esp], eax
+
+ call KPRF_ENTER
+ jmp common_return_path
+
+
+;;
+; On x86 the call to this function has been observed to be put right before
+; return instruction. This fact matters since since we have to calc the same
+; stack address as in _penter.
+;
+; Thus the stack layout is as follows:
+; 24 return address of the calling function.
+; 20 our return address - the address of the calling function + 5.
+; 1c eax
+; 18 edx
+; 14 eflags
+; 10 ecx
+; c tsc high - param 3
+; 8 tsc low
+; 4 frame pointer - param 2
+; 0 function ptr - param 1
+;
+;
+align 16
+__pexit:
+ ; save volatile register and get the time stamp.
+ push eax
+ push edx
+ rdtsc
+ pushfd
+ push ecx
+
+ ; setting up the leave call frame (cdecl).
+ sub esp, 4 + 4 + 8
+ mov [esp + 0ch], edx ; Param 3 - the timestamp
+ mov [esp + 08h], eax
+ lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
+ mov [esp + 04h], edx
+ mov eax, [esp + 20h] ; Param 1 - Some address in the function.
+ sub eax, 5 ; call instruction
+ mov [esp], eax
+
+ call KPRF_LEAVE
+ jmp common_return_path
+
+
+;;
+; This is the common return path for both the enter and exit hooks.
+; It's kept common because we can then use the same overhead adjustment
+; and save some calibration efforts. It also saves space :-)
+align 16
+common_return_path:
+ ; Update overhead
+ test eax, eax
+ jz common_no_overhead
+ cmp byte [g_fCalibrated], 0
+ jnz common_overhead
+ call calibrate
+common_overhead:
+ mov ecx, eax ; ecx <- pointer to overhead counter.
+ mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
+ sub [esp + 08h], eax
+ sbb dword [esp + 0ch], 0
+
+ rdtsc
+ sub eax, [esp + 08h]
+ sbb edx, [esp + 0ch]
+ add [ecx], eax
+ adc [ecx + 4], edx
+common_no_overhead:
+ add esp, 4 + 4 + 8
+
+ ; restore volatile registers.
+ pop ecx
+ popfd
+ pop edx
+ pop eax
+ ret
+
+;;
+; Data esi points to while we're calibrating.
+struc CALIBDATA
+ .OverheadLo resd 1
+ .OverheadHi resd 1
+ .ProfiledLo resd 1
+ .ProfiledHi resd 1
+ .EnterTSLo resd 1
+ .EnterTSHi resd 1
+ .MinLo resd 1
+ .MinHi resd 1
+endstruc
+
+
+
+align 16
+;;
+; Do necessary calibrations.
+;
+calibrate:
+ ; prolog
+ push ebp
+ mov ebp, esp
+ pushfd
+ pushad
+ sub esp, CALIBDATA_size
+ mov esi, esp ; esi points to the CALIBDATA
+
+ ;
+ ; Indicate that we have finished calibrating.
+ ;
+ mov eax, 1
+ xchg dword [g_fCalibrated], eax
+
+ ;
+ ; The outer loop - find the right adjustment.
+ ;
+ mov ebx, 200h ; loop counter.
+calib_outer_loop:
+
+ ;
+ ; The inner loop - calls the function number of times to establish a
+ ; good minimum value
+ ;
+ mov ecx, 200h
+ mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
+ mov dword [esi + CALIBDATA.MinHi], 07fffffffh
+calib_inner_loop:
+
+ ; zero the overhead and profiled times.
+ xor eax, eax
+ mov [esi + CALIBDATA.OverheadLo], eax
+ mov [esi + CALIBDATA.OverheadHi], eax
+ mov [esi + CALIBDATA.ProfiledLo], eax
+ mov [esi + CALIBDATA.ProfiledHi], eax
+ call calib_nullproc
+
+ ; subtract the overhead
+ mov eax, [esi + CALIBDATA.ProfiledLo]
+ mov edx, [esi + CALIBDATA.ProfiledHi]
+ sub eax, [esi + CALIBDATA.OverheadLo]
+ sbb edx, [esi + CALIBDATA.OverheadHi]
+
+ ; update the minimum value.
+ test edx, 080000000h
+ jnz near calib_outer_dec ; if negative, just simplify and shortcut
+ cmp edx, [esi + CALIBDATA.MinHi]
+ jg calib_inner_next
+ jl calib_inner_update_minimum
+ cmp eax, [esi + CALIBDATA.MinLo]
+ jge calib_inner_next
+calib_inner_update_minimum:
+ mov [esi + CALIBDATA.MinLo], eax
+ mov [esi + CALIBDATA.MinHi], edx
+calib_inner_next:
+ loop calib_inner_loop
+
+ ; Is the minimum value acceptable?
+ test dword [esi + CALIBDATA.MinHi], 80000000h
+ jnz calib_outer_dec ; simplify if negative.
+ cmp dword [esi + CALIBDATA.MinHi], 0
+ jnz calib_outer_inc ; this shouldn't be possible
+ cmp dword [esi + CALIBDATA.MinLo], 1fh
+ jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
+ cmp dword [esi + CALIBDATA.MinLo], 30h
+ jbe calib_done ; this is fine!
+calib_outer_inc:
+ inc dword [g_OverheadAdj]
+ jmp calib_outer_next
+calib_outer_dec:
+ cmp dword [g_OverheadAdj], 1
+ je calib_done
+ dec dword [g_OverheadAdj]
+calib_outer_next:
+ dec ebx
+ jnz calib_outer_loop
+calib_done:
+
+ ; epilog
+ add esp, CALIBDATA_size
+ popad
+ popfd
+ leave
+ ret
+
+
+
+
+;;
+; The calibration __penter - this must be identical to the real thing except for the KPRF call.
+align 16
+calib_penter:
+ ; This part must be identical
+ push eax
+ push edx
+ rdtsc
+ pushfd
+ push ecx
+
+ ; store the entry
+ mov [esi + CALIBDATA.EnterTSLo], eax
+ mov [esi + CALIBDATA.EnterTSHi], edx
+
+ ; create the call frame
+ push edx
+ push eax
+ push 0
+ push 0
+
+ lea eax, [esi + CALIBDATA.OverheadLo]
+ jmp common_overhead
+
+
+;;
+; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
+align 16
+calib_pexit:
+ ; This part must be identical
+ push eax
+ push edx
+ rdtsc
+ pushfd
+ push ecx
+
+ ; update the time
+ push eax
+ push edx
+ sub eax, [esi + CALIBDATA.EnterTSLo]
+ sbb edx, [esi + CALIBDATA.EnterTSHi]
+ add [esi + CALIBDATA.ProfiledLo], eax
+ adc [esi + CALIBDATA.ProfiledHi], edx
+ pop edx
+ pop eax
+
+ ; create the call frame
+ push edx
+ push eax
+ push 0
+ push 0
+
+ lea eax, [esi + CALIBDATA.EnterTSLo]
+ jmp common_overhead
+
+
+;;
+; The 'function' we're profiling.
+; The general idea is that each pair should take something like 2-10 ticks.
+;
+; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
+align 16
+calib_nullproc:
+ call calib_penter ;0
+ call calib_pexit
+
+ call calib_penter ;1
+ call calib_pexit
+
+ call calib_penter ;2
+ call calib_pexit
+
+ call calib_penter ;3
+ call calib_pexit
+
+ call calib_penter ;4
+ call calib_pexit
+
+ call calib_penter ;5
+ call calib_pexit
+
+ call calib_penter ;6
+ call calib_pexit
+
+ call calib_penter ;7
+ call calib_pexit
+
+ call calib_penter ;8
+ call calib_pexit
+
+ call calib_penter ;9
+ call calib_pexit
+
+ call calib_penter ;a
+ call calib_pexit
+
+ call calib_penter ;b
+ call calib_pexit
+
+ call calib_penter ;c
+ call calib_pexit
+
+ call calib_penter ;d
+ call calib_pexit
+
+ call calib_penter ;e
+ call calib_pexit
+
+ call calib_penter ;f
+ call calib_pexit
+ ret
+