-
Notifications
You must be signed in to change notification settings - Fork 1
/
quantizations.yaml
137 lines (136 loc) · 2.45 KB
/
quantizations.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
quantizations:
- IQ2_S
- IQ2_M
- IQ3_M
- IQ4_NL
- IQ4_XS
- Q3_K_L
- Q3_K_M
- Q4_K_M
- Q4_K_S
- Q5_K_M
- Q5_K_S
- Q6_K
- Q8_0
allowed_quantization_types:
- name: Q4_0
size: 4.34G
ppl: +0.4685
details: Llama-3-8B
- name: Q4_1
size: 4.78G
ppl: +0.4511
details: Llama-3-8B
- name: Q5_0
size: 5.21G
ppl: +0.1316
details: Llama-3-8B
- name: Q5_1
size: 5.65G
ppl: +0.1062
details: Llama-3-8B
- name: IQ2_XXS
size: "2.06 bpw"
type: quantization
- name: IQ2_XS
size: "2.31 bpw"
type: quantization
- name: IQ2_S
size: "2.5 bpw"
type: quantization
- name: IQ2_M
size: "2.7 bpw"
type: quantization
- name: IQ1_S
size: "1.56 bpw"
type: quantization
- name: IQ1_M
size: "1.75 bpw"
type: quantization
- name: TQ1_0
size: "1.69 bpw"
type: ternarization
- name: TQ2_0
size: "2.06 bpw"
type: ternarization
- name: Q2_K
size: 2.96G
ppl: +3.5199
details: Llama-3-8B
- name: Q2_K_S
size: 2.96G
ppl: +3.1836
details: Llama-3-8B
- name: IQ3_XXS
size: "3.06 bpw"
type: quantization
- name: IQ3_S
size: "3.44 bpw"
type: quantization
- name: IQ3_M
size: "3.66 bpw"
type: quantization mix
- name: Q3_K
alias: Q3_K_M
- name: IQ3_XS
size: "3.3 bpw"
type: quantization
- name: Q3_K_S
size: 3.41G
ppl: +1.6321
details: Llama-3-8B
- name: Q3_K_M
size: 3.74G
ppl: +0.6569
details: Llama-3-8B
- name: Q3_K_L
size: 4.03G
ppl: +0.5562
details: Llama-3-8B
- name: IQ4_NL
size: "4.50 bpw"
type: non-linear quantization
- name: IQ4_XS
size: "4.25 bpw"
type: non-linear quantization
- name: Q4_K
alias: Q4_K_M
- name: Q4_K_S
size: 4.37G
ppl: +0.2689
details: Llama-3-8B
- name: Q4_K_M
size: 4.58G
ppl: +0.1754
details: Llama-3-8B
- name: Q5_K
alias: Q5_K_M
- name: Q5_K_S
size: 5.21G
ppl: +0.1049
details: Llama-3-8B
- name: Q5_K_M
size: 5.33G
ppl: +0.0569
details: Llama-3-8B
- name: Q6_K
size: 6.14G
ppl: +0.0217
details: Llama-3-8B
- name: Q8_0
size: 7.96G
ppl: +0.0026
details: Llama-3-8B
- name: F16
size: 14.00G
ppl: +0.0020
details: Mistral-7B
- name: BF16
size: 14.00G
ppl: -0.0050
details: Mistral-7B
- name: F32
size: 26.00G
details: 7B
- name: COPY
description: Only copy tensors, no quantizing