optimize popcount implementation #348

sadlerap · 2023-10-06T00:28:08Z

In the current implementation, the gcc backend of rustc currently emits the
following for a function that implements popcount for a u32 (x86_64 targeting
AVX2, using standard unix calling convention):

popcount:
    mov     eax, edi
    and     edi, 1431655765
    shr     eax
    and     eax, 1431655765
    add     edi, eax
    mov     edx, edi
    and     edi, 858993459
    shr     edx, 2
    and     edx, 858993459
    add     edx, edi
    mov     eax, edx
    and     edx, 252645135
    shr     eax, 4
    and     eax, 252645135
    add     eax, edx
    mov     edx, eax
    and     eax, 16711935
    shr     edx, 8
    and     edx, 16711935
    add     edx, eax
    movzx   eax, dx
    shr     edx, 16
    add     eax, edx
    ret

Rather than using this implementation, gcc could be told to use Wenger's
algorithm. This would give the same function the following implementation:

popcount:
    xor eax, eax
    xor edx, edx
    popcnt eax, edi
    test edi, edi
    cmove eax, edx
    ret

This patch implements the popcount operation in terms of Wenger's algorithm in
all cases.

src/intrinsic/mod.rs

antoyo · 2023-10-08T12:48:06Z

Is the asm the same after your last fix?
Could you please also post the asm for popcount on a u64?

sadlerap · 2023-10-08T20:46:28Z

The asm should be the same. For completeness sake:

u8

Before:

popcount_u8:
        mov eax, edi
        and edi, 85
        shr al
        and eax, 85
        add edi, eax
        mov edx, edi
        and edi, 51
        shr dl, 2
        and edx, 51
        add edx, edi
        mov eax, edx
        shr dl, 4
        and eax, 15
        add eax, edx
        movzx eax, al
        ret

After:

popcount_u8:
        movzx eax, dil
        xor edx, edx
        popcnt eax, eax
        test dil, dil
        cmove eax, edx
        ret

u16

Before:

popcount_u16:
        mov eax, edi
        and di, 21845
        shr ax
        and ax, 21845
        add edi, eax
        mov eax, edi
        and di, 13107
        shr ax, 2
        and ax, 13107
        add eax, edi
        mov edx, eax
        and ax, 3855
        shr dx, 4
        and dx, 3855
        add edx, eax
        movzx eax, dl
        shr dx, 8
        add eax, edx
        movzx eax, ax
        ret

After:

popcount_u16:
        xor edx, edx
        popcnt ax, di
        test di, di
        movzx eax, ax
        cmove eax, edx
        ret

u32

Before:

popcount_u32:
        mov eax, edi
        and edi, 1431655765
        shr eax
        and eax, 1431655765
        add edi, eax
        mov edx, edi
        and edi, 858993459
        shr edx, 2
        and edx, 858993459
        add edx, edi
        mov eax, edx
        and edx, 252645135
        shr eax, 4
        and eax, 252645135
        add eax, edx
        mov edx, eax
        and eax, 16711935
        shr edx, 8
        and edx, 16711935
        add edx, eax
        movzx eax, dx
        shr edx, 16
        add eax, edx
        ret

After:

popcount_u32:
        xor eax, eax
        xor edx, edx
        popcnt eax, edi
        test edi, edi
        cmove eax, edx
        ret

u64

Before:

popcount_u64:
        movabs rdx, 6148914691236517205
        mov rax, rdi
        movabs rcx, 1085102592571150095
        shr rax
        and rdi, rdx
        and rax, rdx
        movabs rdx, 3689348814741910323
        add rdi, rax
        mov rax, rdi
        and rdi, rdx
        shr rax, 2
        and rax, rdx
        add rax, rdi
        mov rdx, rax
        and rax, rcx
        shr rdx, 4
        and rdx, rcx
        movabs rcx, 71777214294589695
        add rdx, rax
        mov rax, rdx
        and rdx, rcx
        shr rax, 8
        and rax, rcx
        movabs rcx, 281470681808895
        add rax, rdx
        mov rdx, rax
        and rax, rcx
        shr rdx, 16
        and rdx, rcx
        add rax, rdx
        mov rdx, rax
        shr rdx, 32
        add eax, edx
        ret

After:

popcount_u64:
        xor edx, edx
        xor eax, eax
        popcnt rdx, rdi
        test rdi, rdi
        cmovne eax, edx
        ret

u128

Before:

popcount_128:
        popcnt rsi, rsi
        popcnt rdi, rdi
        lea eax, [rdi+rsi]
        ret

After:

popcount_128:
        xor edx, edx
        xor eax, eax
        popcnt rdx, rsi
        test rsi, rsi
        cmovne eax, edx
        xor edx, edx
        popcnt rdx, rdi
        add edx, eax
        test rdi, rdi
        cmovne eax, edx
        ret

Looks like the 128-bit case might have worse codegen, but considering we're no longer using built-in functions, gcc shouldn't ever emit a function call here. Not sure which is better here, thoughts?

antoyo · 2023-10-09T13:38:46Z

I found out how to optimize this better.
It seems GCC requires the result type to be int (and not unsigned int):

So, by changing result_type to self.int_type, which requires to change value_type to value.get_type().to_unsigned(self.cx), we get an improvement for the cases of 64-bit and 128-bit.

I'm not sure why it doesn't work for the <= 32-bit cases: it should work as the GCC test cases above show. If you'd like to investigate, that's awesome; otherwise, we can just merge as is.

Btw, the case for u128 is broken (perhaps caused by my recent changes on the master branch). It works for i128, though. It generates:

0000000000014650 <_RNvCs45LWolUvLt3_9test_rust13popcount_u128.constprop.0>:
   14650:	31 d2                	xor    edx,edx
   14652:	31 c0                	xor    eax,eax
   14654:	f3 48 0f b8 d7       	popcnt rdx,rdi
   14659:	48 85 ff             	test   rdi,rdi
   1465c:	0f 45 c2             	cmovne eax,edx
   1465f:	c3                   	ret

(only one popcnt instead of the expected 2)

If you would like to take a look, that's good; otherwise I can fix it myself.

sadlerap · 2023-10-09T22:20:37Z

I'm not able to reproduce the u128 issue, but I've found codegen to be less verbose if we don't special case it.

One problem I ran into when developing this is assuming that the ctpop intrinsic always returns the same type, no matter the input type. From what I'm able to determine, the input type should be the same as the output, so I think we need to keep result_type as is. That said, there's no reason the counter can't be int, so I've implemented both this and the u128 fixes in 144fdba.

I'm now getting this:

popcount_u8:
        movzx eax, dil
        xor edx, edx
        popcnt eax, eax
        test dil, dil
        cmove eax, edx
        ret

popcount_u16:
        xor edx, edx
        popcnt ax, di
        test di, di
        movzx eax, ax
        cmove eax, edx
        ret

popcount_u32:
        xor eax, eax
        xor edx, edx
        popcnt eax, edi
        test edi, edi
        cmove eax, edx
        ret

popcount_u64:
        xor edx, edx
        xor eax, eax
        popcnt rdx, rdi
        test rdi, rdi
        cmovne eax, edx
        ret

popcount_128:
        mov rdx, rdi
        or rdx, rsi
        je .L68
        xor ecx, ecx
        popcnt rsi, rsi
        popcnt rcx, rdi
        lea eax, [rsi+rcx]
        ret
.L68:
        xor eax, eax
        ret

Sadly, it looks like we might be running into a gcc bug - there might not be a whole lot we can do here to get rid of the branching that gets generated until that bug gets fixed.

antoyo · 2023-10-09T22:29:14Z

I'm not able to reproduce the u128 issue, but I've found codegen to be less verbose if we don't special case it.

Did you rebase on master?

From what I'm able to determine, the input type should be the same as the output, so I think we need to keep result_type as is.

You're talking about the Rust intrinsic? If so, I was able to change the result_type to int and it seems to work. I'm wondering if there was a cast added elsewhere that made it work.
Did you end up having some errors when doing that?

sadlerap · 2023-10-10T03:12:46Z

Just rebased on master, still can't reproduce (though I am testing with my latest fixes). That said, it looks like CI complains with u128s, so I'll revert that change.

From what I'm able to determine, the input type should be the same as the output, so I think we need to keep result_type as is.

You're talking about the Rust intrinsic? If so, I was able to change the result_type to int and it seems to work. I'm wondering if there was a cast added elsewhere that made it work.
Did you end up having some errors when doing that?

Yes - the ui test tests/ui/intrinsics/intrinsics-integer.rs begins exhibiting compile-time errors for me.

antoyo · 2023-10-10T12:36:25Z

Ok, thanks for testing.

Is this ready to be merged?

sadlerap · 2023-10-10T13:39:40Z

At this point, there's not much more I think I can do. I want to revisit codegen for u128, but I'll punt that for a separate PR if anything comes of that.

antoyo · 2023-10-10T23:33:54Z

I checked out your branch directly and I still get the bad codegen for u128:

edit: I was using the wrong branch of gcc.
It seems now both i128 and u128 are wrong:

00000000000145f0 <_RNvCsC6pwSZXHbp_9test_rust13popcount_i128>:
   145f0:	31 d2                	xor    edx,edx
   145f2:	31 c0                	xor    eax,eax
   145f4:	f3 48 0f b8 d7       	popcnt rdx,rdi
   145f9:	48 85 ff             	test   rdi,rdi
   145fc:	0f 45 c2             	cmovne eax,edx
   145ff:	c3                   	ret

0000000000014600 <_RNvCsC6pwSZXHbp_9test_rust13popcount_u128>:
   14600:	31 d2                	xor    edx,edx
   14602:	31 c0                	xor    eax,eax
   14604:	f3 48 0f b8 d7       	popcnt rdx,rdi
   14609:	48 85 ff             	test   rdi,rdi
   1460c:	0f 45 c2             	cmovne eax,edx
   1460f:	c3                   	ret

edit: the following was with a bad gcc branch (forget about this):

0000000000014620 <_RNvCsC6pwSZXHbp_9test_rust13popcount_u128.constprop.0>:
   14620:	31 d2                	xor    edx,edx
   14622:	31 c0                	xor    eax,eax
   14624:	f3 48 0f b8 d7       	popcnt rdx,rdi
   14629:	48 85 ff             	test   rdi,rdi
   1462c:	0f 45 c2             	cmovne eax,edx
   1462f:	c3                   	ret

0000000000014630 <_RNvCsC6pwSZXHbp_9test_rust13popcount_i128.constprop.0>:
   14630:	31 c0                	xor    eax,eax
   14632:	f3 48 0f b8 c6       	popcnt rax,rsi
   14637:	48 85 f6             	test   rsi,rsi
   1463a:	48 0f 44 c6          	cmove  rax,rsi
   1463e:	31 c9                	xor    ecx,ecx
   14640:	f3 48 0f b8 cf       	popcnt rcx,rdi
   14645:	48 89 c2             	mov    rdx,rax
   14648:	48 01 c8             	add    rax,rcx
   1464b:	48 85 ff             	test   rdi,rdi
   1464e:	48 0f 44 c2          	cmove  rax,rdx
   14652:	c3                   	ret
   14653:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
   1465a:	00 00 00 
   1465d:	0f 1f 00             	nop    DWORD PTR [rax]

sadlerap · 2023-10-11T00:16:30Z

For the record: I've been testing against commit a033810a6062a1f8ebe51e224eaa4faa0c7e173c in your gcc fork. It doesn't look like changes since then should cause any difference, but I'll retest against the latest commit there and see if there's any change on my side.

sadlerap · 2023-10-11T00:26:05Z

hmmm, latest commit spits out a lot of warnings:

libgccjit.so: error: ‘always_inline’ function might not be inlinable [-Wattributes]

antoyo · 2023-10-11T00:36:02Z

Yeah, sorry about that. I merged a GCC PR, but not rust-lang/gccjit.rs#23 so this is causing some issue. I think it's best to wait that I merge it, so probably tomorrow.

sadlerap · 2023-10-11T00:38:49Z

No worries - take your time.

antoyo · 2023-10-17T12:42:45Z

This is fixed. You can rebase with master.

sadlerap · 2023-10-17T16:14:12Z

I've rebased, looks like CI passes. Can you make sure everything looks good from your side?

antoyo · 2023-10-17T16:15:11Z

Yes. I'll look at that tonight.

src/intrinsic/mod.rs

In the current implementation, the gcc backend of rustc currently emits the following for a function that implements popcount for a u32 (x86_64 targeting AVX2, using standard unix calling convention): popcount: mov eax, edi and edi, 1431655765 shr eax and eax, 1431655765 add edi, eax mov edx, edi and edi, 858993459 shr edx, 2 and edx, 858993459 add edx, edi mov eax, edx and edx, 252645135 shr eax, 4 and eax, 252645135 add eax, edx mov edx, eax and eax, 16711935 shr edx, 8 and edx, 16711935 add edx, eax movzx eax, dx shr edx, 16 add eax, edx ret Rather than using this implementation, gcc could be told to use Wenger's algorithm. This would give the same function the following implementation: popcount: xor eax, eax xor edx, edx popcnt eax, edi test edi, edi cmove eax, edx ret This patch implements the popcount operation in terms of Wenger's algorithm in all cases. Signed-off-by: Andy Sadler <[email protected]>

sadlerap · 2023-10-17T21:31:52Z

Applied your suggestions with 64abf58.

antoyo · 2023-10-17T22:12:36Z

I'm still getting the wrong asm for u128 and i128:

00000000000145f0 <_RNvCsC6pwSZXHbp_9test_rust13popcount_i128>:
   145f0:	31 d2                	xor    edx,edx
   145f2:	31 c0                	xor    eax,eax
   145f4:	f3 48 0f b8 d7       	popcnt rdx,rdi
   145f9:	48 85 ff             	test   rdi,rdi
   145fc:	0f 45 c2             	cmovne eax,edx
   145ff:	c3                   	ret

0000000000014600 <_RNvCsC6pwSZXHbp_9test_rust13popcount_u128>:
   14600:	31 d2                	xor    edx,edx
   14602:	31 c0                	xor    eax,eax
   14604:	f3 48 0f b8 d7       	popcnt rdx,rdi
   14609:	48 85 ff             	test   rdi,rdi
   1460c:	0f 45 c2             	cmovne eax,edx
   1460f:	c3                   	ret

It has only one popcnt instead of the expected two. Are you able to reproduce on your side?

sadlerap · 2023-10-18T00:20:34Z

I haven't been able to reproduce that behavior. Do you have the source for a test I can use locally?

antoyo · 2023-10-18T00:22:03Z

Yes, that's what I use:

#[inline(never)]
fn popcount_i128(val: i128) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_u128(val: u128) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_u64(val: u64) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_i64(val: i64) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_u32(val: u32) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_i32(val: i32) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_u16(val: u16) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_i16(val: i16) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_u8(val: u8) -> u32 {
    val.count_ones()
}

#[inline(never)]
fn popcount_i8(val: i8) -> u32 {
    val.count_ones()
}

fn main() {
    let value = std::env::args().count();
    println!("{}", popcount_i128(value as i128));
    println!("{}", popcount_u128(value as u128));
    println!("{}", popcount_i64(value as i64));
    println!("{}", popcount_u64(value as u64));
    println!("{}", popcount_i32(value as i32));
    println!("{}", popcount_u32(value as u32));
    //println!("{}", popcount_u16(value as u16));
    //println!("{}", popcount_i16(value as i16));
    //println!("{}", popcount_u8(value as u8));
    //println!("{}", popcount_i8(value as i8));
}

I could also try to add this to the int tests to see if the CI fails in the same way that it fails locally.

sadlerap · 2023-10-18T00:25:07Z

Thanks - I'll try to see if I can reproduce locally.

I could also try to add this to the int tests to see if the CI fails in the same way that it fails locally.

FWIW rustc already has some ui tests for popcount intrinsics under tests/ui/intrinsics/intrinsics-integer.rs, but it may not be a bad idea.

antoyo · 2023-10-18T00:28:32Z

Oh, I think I have been mistaken by some optimizations that are made since the value sent to popcount_u128 is of type usize and somewhere in the compiler, it will specialize the function so that it only works on 64-bit values.
Using the following:

    println!("{}", popcount_i128(std::hint::black_box(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128 as i128)));

generates the correct:

0000000000014620 <_RNvCsC6pwSZXHbp_9test_rust13popcount_u128>:
   14620:	31 c0                	xor    eax,eax
   14622:	f3 48 0f b8 c6       	popcnt rax,rsi
   14627:	48 85 f6             	test   rsi,rsi
   1462a:	48 0f 44 c6          	cmove  rax,rsi
   1462e:	31 c9                	xor    ecx,ecx
   14630:	f3 48 0f b8 cf       	popcnt rcx,rdi
   14635:	48 89 c2             	mov    rdx,rax
   14638:	48 01 c8             	add    rax,rcx
   1463b:	48 85 ff             	test   rdi,rdi
   1463e:	48 0f 44 c2          	cmove  rax,rdx
   14642:	c3                   	ret
   14643:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
   1464a:	00 00 00 
   1464d:	0f 1f 00             	nop    DWORD PTR [rax]

Does that make sense?

sadlerap · 2023-10-18T00:33:21Z

That looks to be what's happening, yes. If I replace value with a constant (e.g. 0xffff_ffff_ffff_ffffu128), all the functions get turned into functions that return constants.

Details

popcount_test::popcount_i8.constprop.0:

        mov eax, 8
        ret

popcount_test::popcount_u8.constprop.0:

        mov eax, 8
        ret

popcount_test::popcount_i16.constprop.0:

        mov eax, 16
        ret

popcount_test::popcount_u16.constprop.0:

        mov eax, 16
        ret

popcount_test::popcount_i32.constprop.0:

        mov eax, 32
        ret

popcount_test::popcount_u32.constprop.0:

        mov eax, 32
        ret

popcount_test::popcount_i64.constprop.0:

        mov eax, 64
        ret

popcount_test::popcount_u64.constprop.0:

        mov eax, 64
        ret

popcount_test::popcount_u128.constprop.0:

        mov eax, 64
        ret

popcount_test::popcount_i128.constprop.0:

        mov eax, 64
        ret

Looks like the optimizer was smarter than I realized!

antoyo · 2023-10-18T00:35:23Z

Your earlier comment shows a shorter asm for the 128-bit case (with a jump, though).
Did you change anything that you forgot to push or is this ready to be merged?

Edit: Oh, I think you reverted that change.
I'll still wait for your confirmation before merging, though.

sadlerap · 2023-10-18T00:43:30Z

I reverted that change since it was causing issues on builds where 128-bit integers weren't natively supported. I have a patch re-implementing and gating it behind 128-bit integers being natively supported by libgccjit. If you want, I can post it in this PR, otherwise I'll make another PR for it.

antoyo · 2023-10-18T00:47:36Z

Thanks a lot for your contribution!

You can open a new PR for this. I'd be curious to know what's causing the issues on non-native 128-bit integers.

sadlerap · 2023-10-18T00:51:49Z

AIUI it's because 128-bit integers aren't actually numbers when we emulate support for them, and that broke a few assumptions. You can find the logs for that CI run here.

antoyo · 2023-10-18T00:54:40Z

This errors seemed to be caused by the fact that we call this libgccjit function directly instead of the wrapper in the int module.
I'm afraid you'll have to revert some of the suggestions I made earlier in this PR to make this work.
I'm sorry.

sadlerap · 2023-10-18T01:00:28Z

Filed #354 as a followup. I think the easiest thing to do is to gate support there and fall back on splitting 128-bit integers into two 64-bit integers and combining the result. Once this bug gets fixed, I doubt there will be much of a difference in codegen anyway.

antoyo reviewed Oct 6, 2023

View reviewed changes

src/intrinsic/mod.rs Outdated Show resolved Hide resolved

sadlerap force-pushed the optimize-popcount branch from 1841ab7 to 5ae3d85 Compare October 6, 2023 03:52

sadlerap marked this pull request as draft October 6, 2023 04:19

sadlerap force-pushed the optimize-popcount branch from 5ae3d85 to 78c7ac3 Compare October 8, 2023 01:31

sadlerap marked this pull request as ready for review October 8, 2023 01:31

sadlerap force-pushed the optimize-popcount branch from 78c7ac3 to 144fdba Compare October 9, 2023 22:20

sadlerap force-pushed the optimize-popcount branch from 144fdba to 10e97a3 Compare October 10, 2023 03:12

antoyo mentioned this pull request Oct 10, 2023

Further optimize the popcount intrinsic #351

Open

sadlerap force-pushed the optimize-popcount branch from 10e97a3 to 15c20c0 Compare October 17, 2023 14:00

antoyo reviewed Oct 17, 2023

View reviewed changes

src/intrinsic/mod.rs Outdated Show resolved Hide resolved

src/intrinsic/mod.rs Outdated Show resolved Hide resolved

src/intrinsic/mod.rs Outdated Show resolved Hide resolved

src/intrinsic/mod.rs Outdated Show resolved Hide resolved

sadlerap force-pushed the optimize-popcount branch from 15c20c0 to 64abf58 Compare October 17, 2023 21:21

antoyo merged commit fabdc1a into rust-lang:master Oct 18, 2023
21 of 32 checks passed

sadlerap deleted the optimize-popcount branch October 18, 2023 00:49

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

optimize popcount implementation #348

optimize popcount implementation #348

sadlerap commented Oct 6, 2023 •

edited

Loading

antoyo commented Oct 8, 2023

sadlerap commented Oct 8, 2023

antoyo commented Oct 9, 2023 •

edited

Loading

sadlerap commented Oct 9, 2023

antoyo commented Oct 9, 2023

sadlerap commented Oct 10, 2023

antoyo commented Oct 10, 2023

sadlerap commented Oct 10, 2023

antoyo commented Oct 10, 2023 •

edited

Loading

sadlerap commented Oct 11, 2023

sadlerap commented Oct 11, 2023

antoyo commented Oct 11, 2023

sadlerap commented Oct 11, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 17, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 17, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 •

edited

Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 •

edited

Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 •

edited

Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023

sadlerap commented Oct 18, 2023

optimize popcount implementation #348

optimize popcount implementation #348

Conversation

sadlerap commented Oct 6, 2023 • edited Loading

antoyo commented Oct 8, 2023

sadlerap commented Oct 8, 2023

antoyo commented Oct 9, 2023 • edited Loading

sadlerap commented Oct 9, 2023

antoyo commented Oct 9, 2023

sadlerap commented Oct 10, 2023

antoyo commented Oct 10, 2023

sadlerap commented Oct 10, 2023

antoyo commented Oct 10, 2023 • edited Loading

sadlerap commented Oct 11, 2023

sadlerap commented Oct 11, 2023

antoyo commented Oct 11, 2023

sadlerap commented Oct 11, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 17, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 17, 2023

antoyo commented Oct 17, 2023

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 • edited Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 • edited Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023 • edited Loading

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023

sadlerap commented Oct 18, 2023

antoyo commented Oct 18, 2023

sadlerap commented Oct 18, 2023

sadlerap commented Oct 6, 2023 •

edited

Loading

antoyo commented Oct 9, 2023 •

edited

Loading

antoyo commented Oct 10, 2023 •

edited

Loading

antoyo commented Oct 18, 2023 •

edited

Loading

antoyo commented Oct 18, 2023 •

edited

Loading

antoyo commented Oct 18, 2023 •

edited

Loading