-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtsc_amd64.s
112 lines (96 loc) · 3.14 KB
/
tsc_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "textflag.h"
// func GetInOrder() int64
TEXT ·GetInOrder(SB), NOSPLIT, $0
LFENCE // Ensure all previous instructions have exectuted.
RDTSC
LFENCE // Ensure RDTSC to be exectued prior to exection of any subsequent instruction.
SALQ $32, DX
ORQ DX, AX
MOVQ AX, ret+0(FP)
RET
// func RDTSC() int64
TEXT ·RDTSC(SB), NOSPLIT, $0
RDTSC
SALQ $32, DX
ORQ DX, AX
MOVQ AX, ret+0(FP)
RET
// func unixNanoTSC16B() int64
TEXT ·unixNanoTSC16B(SB), NOSPLIT, $0
// Both of RSTSC & RDTSCP are not serializing instructions.
// It does not necessarily wait until all previous instructions
// have been executed before reading the counter.
//
// It's ok to use RSTSC for just getting a timestamp.
RDTSC // high 32bit in DX, low 32bit in AX (tsc).
SALQ $32, DX
ORQ DX, AX // -> [DX, tsc] (high, low)
VCVTSI2SDQ AX, X0, X0 // ftsc = float64(tsc)
MOVQ ·OffsetCoeffAddr(SB), BX
VMOVDQA (BX), X3
VMULSD X3, X0, X0 // ns = coeff * ftsc
VCVTTSD2SIQ X0, AX // un = int64(ns)
VMOVHLPS X3, X3, X3
VMOVQ X3, CX
ADDQ CX, AX // un += offset
MOVQ AX, ret+0(FP)
RET
// func unixNanoTSCFMA() int64
TEXT ·unixNanoTSCFMA(SB), NOSPLIT, $0
// Both of RSTSC & RDTSCP are not serializing instructions.
// It does not necessarily wait until all previous instructions
// have been executed before reading the counter.
//
// It's ok to use RSTSC for just getting a timestamp.
RDTSC // high 32bit in DX, low 32bit in AX (tsc).
SALQ $32, DX
ORQ DX, AX // -> [DX, tsc] (high, low)
VCVTSI2SDQ AX, X0, X0 // ftsc = float64(tsc)
MOVQ ·OffsetCoeffFAddr(SB), BX
VMOVDQA (BX), X3 // get coeff
VMOVHLPS X3, X3, X4 // get offset
VFMADD132PD X0, X4, X3 // X0 * X3 + X4 -> X3: ftsc * coeff + offset
VCVTTSD2SIQ X3, AX
MOVQ AX, ret+0(FP)
RET
// func unixNanoTSC16Bfence() int64
TEXT ·unixNanoTSC16Bfence(SB), NOSPLIT, $0
LFENCE
RDTSC // high 32bit in DX, low 32bit in AX (tsc).
LFENCE
SALQ $32, DX
ORQ DX, AX // -> [DX, tsc] (high, low)
VCVTSI2SDQ AX, X0, X0 // ftsc = float64(tsc)
MOVQ ·OffsetCoeffAddr(SB), BX
VMOVDQA (BX), X3 // get coeff
VMULSD X3, X0, X0 // ns = coeff * ftsc
VCVTTSD2SIQ X0, AX // un = int64(ns)
VMOVHLPS X3, X3, X3
VMOVQ X3, CX
ADDQ CX, AX // un += offset
MOVQ AX, ret+0(FP)
RET
// func loadOffsetCoeff(src *byte) (offset int64, coeff float64)
TEXT ·LoadOffsetCoeff(SB), NOSPLIT, $0
MOVQ src+0(FP), AX
VMOVDQA (AX), X0
VMOVQ X0, BX
VMOVHLPS X0, X0, X0
VMOVQ X0, CX
MOVQ CX, offset+8(FP)
MOVQ BX, coeff+16(FP)
RET
// func storeOffsetCoeff(dst *byte, offset int64, coeff float64)
TEXT ·storeOffsetCoeff(SB), NOSPLIT, $0
MOVQ dst+0(FP), AX
VMOVQ coeff+16(FP), X5
VMOVHPS offset+8(FP), X5, X4
VMOVDQA X4, (AX)
RET
// func storeOffsetCoeff(dst *byte, offset, coeff float64)
TEXT ·storeOffsetFCoeff(SB), NOSPLIT, $0
MOVQ dst+0(FP), AX
VMOVQ coeff+16(FP), X5
VMOVHPS offset+8(FP), X5, X4
VMOVDQA X4, (AX)
RET