Skip to content

Commit

Permalink
typo
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Gschwind committed Apr 10, 2024
1 parent 7c62a14 commit 62ec820
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,12 @@ def _int4_calc_padded_size(k, groupsize=1, innner_k_tiles=1):
def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, groupsize):
origin_x_size = x.size()
x = x.reshape(-1, origin_x_size[-1])
c = torch.ops.aten._weight_int4pack_mm(x.to(dtype=torch.bfloat16), weight_int4pack, groupsize, scales_and_zeros.to(dtype=torch.bfloat16).todtype=x.dtype)
c = torch.ops.aten._weight_int4pack_mm(
x.to(dtype=torch.bfloat16),
weight_int4pack,
groupsize,
scales_and_zeros.to(dtype=torch.bfloat16)
).to(dtype=x.dtype)
new_shape = origin_x_size[:-1] + (out_features,)
c = c.reshape(new_shape)
return c
Expand Down

0 comments on commit 62ec820

Please sign in to comment.