-
Notifications
You must be signed in to change notification settings - Fork 23
Memory usage profiling for benchmark fgemv
ZHG2017 edited this page Mar 28, 2019
·
1 revision
Command: ./benchmark-fgemv -m 4000 -k 4000 -b 100 -q 0 -i 1 -p 3 -t 4 Massif arguments: --pages-as-heap=yes ms_print arguments: massif.out.10021
GB 3.559^ :: | ::: | : : | : : | : : | : : #::@: | : : #::@: | : : #::@: | : : #::@: | : : #::@: | : : #::@: | : : #::@: | : : #::@: | ::::::::: : #::@: | :::::::::::::::::::::::::::::::::::::::::::::::::: : : #::@: | ::: :: : : : #::@:@ | :::: :: : : : #::@:@ |:::::: :: : : : #::@:@ |:::::: :: : : : #::@:@ |:::::: :: : : : #::@:@ 0 +----------------------------------------------------------------------->Gi 0 65.75
Number of snapshots: 59 Detailed snapshots: [27 (peak), 45, 54]
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B)
0 0 270,336 270,336 0 0 1 8,922,817 544,104,448 544,104,448 0 0 2 512,217,100 678,322,176 678,322,176 0 0 3 512,217,131 611,213,312 611,213,312 0 0 4 1,015,499,988 745,431,040 745,431,040 0 0 5 1,015,500,011 678,322,176 678,322,176 0 0 6 1,518,681,162 745,431,040 745,431,040 0 0 7 2,021,946,495 879,648,768 879,648,768 0 0 8 2,021,946,526 812,539,904 812,539,904 0 0 9 2,525,127,680 946,757,632 946,757,632 0 0 10 2,525,127,703 879,648,768 879,648,768 0 0 11 3,028,466,669 946,757,632 946,757,632 0 0 12 3,531,647,834 1,080,975,360 1,080,975,360 0 0 13 3,531,647,857 1,013,866,496 1,013,866,496 0 0 14 4,311,257,612 1,080,975,360 1,080,975,360 0 0 15 5,871,332,106 1,215,193,088 1,215,193,088 0 0 16 5,871,332,129 1,148,084,224 1,148,084,224 0 0 17 32,376,198,024 1,181,638,656 1,181,638,656 0 0 18 32,594,099,365 1,272,090,624 1,272,090,624 0 0 19 32,742,347,725 1,305,645,056 1,305,645,056 0 0 20 32,972,813,408 1,255,337,984 1,255,337,984 0 0 21 33,264,149,557 1,230,376,960 1,230,376,960 0 0 22 33,477,142,854 1,322,577,920 1,322,577,920 0 0 23 53,061,797,896 1,356,132,352 1,356,132,352 0 0 24 61,050,287,709 2,892,136,448 2,892,136,448 0 0 25 61,050,289,627 3,788,140,544 3,788,140,544 0 0 26 62,860,160,492 3,821,694,976 3,821,694,976 0 0 27 65,441,799,051 2,925,694,976 2,925,694,976 0 0 100.00% (2,925,694,976B) (page allocation syscalls) mmap/mremap/brk, --alloc-fns, etc. ->97.59% (2,855,231,488B) 0x7D726B9: mmap (mmap.c:34) | ->61.25% (1,792,012,288B) 0x7CF2AFD: sysmalloc (malloc.c:2323) | | ->61.25% (1,792,012,288B) 0x7CF3741: _int_malloc (malloc.c:3827) | | ->52.50% (1,536,008,192B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | | ->52.50% (1,536,008,192B) 0x7CF871B: posix_memalign (malloc.c:3120) | | | ->52.50% (1,536,008,192B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | | ->52.50% (1,536,004,096B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | | ->52.50% (1,536,004,096B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | | ->52.50% (1,536,004,096B) 0x4372F3: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (benchmark-fgemv.C:91) | | | | | ->52.50% (1,536,004,096B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | | | ->52.50% (1,536,004,096B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | | ->52.50% (1,536,004,096B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->52.50% (1,536,004,096B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | ->52.50% (1,536,004,096B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | | | ->00.00% (4,096B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | ->08.75% (256,004,096B) 0x7CF5182: malloc (malloc.c:2913) | | ->08.75% (256,004,096B) 0x72271E6: operator new(unsigned long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) | | ->08.75% (256,004,096B) 0x437A50: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | ->08.75% (256,004,096B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | ->08.75% (256,004,096B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->08.75% (256,004,096B) 0x82486B8: start_thread (pthread_create.c:333) | | ->08.75% (256,004,096B) 0x7D7841B: clone (clone.S:109) | | | ->18.35% (536,870,912B) 0x7CEE3CF: new_heap (arena.c:438) | | ->13.76% (402,653,184B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->13.76% (402,653,184B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->11.47% (335,544,320B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->11.47% (335,544,320B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->11.47% (335,544,320B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->11.47% (335,544,320B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->11.47% (335,544,320B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->11.47% (335,544,320B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->11.47% (335,544,320B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->11.47% (335,544,320B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->02.29% (67,108,864B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | | ->02.29% (67,108,864B) 0x7CF871B: posix_memalign (malloc.c:3120) | | | ->02.29% (67,108,864B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | | ->02.29% (67,108,864B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | ->02.29% (67,108,864B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->02.29% (67,108,864B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->02.29% (67,108,864B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.29% (67,108,864B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->02.29% (67,108,864B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->02.29% (67,108,864B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->02.29% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.29% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->02.29% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->04.59% (134,217,728B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->04.59% (134,217,728B) 0x7CF5248: malloc (malloc.c:2911) | | ->04.59% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.29% (67,108,864B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | ->02.29% (67,108,864B) 0x408116: main._omp_fn.1 (zring.h:65) | | | ->02.29% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.29% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->02.29% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->02.29% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.29% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->02.29% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->02.29% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->02.29% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->08.03% (234,881,024B) 0x54022B4: ??? (in /usr/lib/libopenblasp-r0.2.18.so) | | ->08.03% (234,881,024B) 0x5402600: blas_memory_alloc (in /usr/lib/libopenblasp-r0.2.18.so) | | ->04.59% (134,217,728B) 0x51EDF40: cblas_dgemm (in /usr/lib/libopenblasp-r0.2.18.so) | | | ->03.44% (100,663,296B) 0x42563C: Givaro::ZRing<double>::Element_ptr FFLAS::fgemm<Givaro::ZRing<double>, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | | ->03.44% (100,663,296B) 0x426F64: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) (pfgemm_variants.inl:187) | | | | ->02.29% (67,108,864B) 0x428155: FFPACK::rns_double::init(unsigned long, unsigned long, double*, unsigned long, Givaro::Integer const*, unsigned long, unsigned long, bool) const (fflas_pfgemm.inl:66) | | | | | ->02.29% (67,108,864B) 0x428730: FFPACK::RNSInteger<FFPACK::rns_double>::init(FFPACK::rns_double_elt&, Givaro::Integer const&) const (rns-integer.h:115) | | | | | ->02.29% (67,108,864B) 0x432D9A: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (rns-integer.h:63) | | | | | ->02.29% (67,108,864B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | | | ->02.29% (67,108,864B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | | | ->01.15% (33,554,432B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.15% (33,554,432B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.15% (33,554,432B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | | | | ->01.15% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.15% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | | ->01.15% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | | | ->01.15% (33,554,432B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->01.15% (33,554,432B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | | | ->01.15% (33,554,432B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | | | ->01.15% (33,554,432B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | | ->01.15% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->01.15% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | ->01.15% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | ->01.15% (33,554,432B) 0x427E04: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) [clone ._omp_fn.26] (pfgemm_variants.inl:213) | | | | ->01.15% (33,554,432B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.15% (33,554,432B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.15% (33,554,432B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | | ->01.15% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.15% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->01.15% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->01.15% (33,554,432B) 0x4156A9: void FFLAS::fgemm<Givaro::ModularBalanced<double> >(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | ->01.15% (33,554,432B) 0x41EFEB: Givaro::ModularBalanced<double>::Element_ptr FFLAS::fgemm<Givaro::ModularBalanced<double>, FFLAS::ModeCategories::LazyTag>(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_winograd.inl:400) | | | ->01.15% (33,554,432B) 0x431C19: Givaro::Modular<double, double, void>::Element_ptr FFLAS::Protected::fgemm_convert<Givaro::ModularBalanced<double>, Givaro::Modular<double, double, void>, FFLAS::ModeCategories::DelayedTag>(Givaro::Modular<double, double, void> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::Modular<double, double, void>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DelayedTag, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:429) | | | ->01.15% (33,554,432B) 0x43474F: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:414) | | | ->01.15% (33,554,432B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->01.15% (33,554,432B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->01.15% (33,554,432B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->01.15% (33,554,432B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->01.15% (33,554,432B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->01.15% (33,554,432B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->01.15% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->01.15% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->01.15% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | ->03.44% (100,663,296B) 0x5402CF8: ??? (in /usr/lib/libopenblasp-r0.2.18.so) | | ->03.44% (100,663,296B) 0x82486B8: start_thread (pthread_create.c:333) | | ->03.44% (100,663,296B) 0x7D7841B: clone (clone.S:109) | | | ->06.88% (201,326,592B) 0x7CEE35B: new_heap (arena.c:427) | | ->04.59% (134,217,728B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->04.59% (134,217,728B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->04.59% (134,217,728B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->04.59% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->04.59% (134,217,728B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->04.59% (134,217,728B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->04.59% (134,217,728B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->04.59% (134,217,728B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->04.59% (134,217,728B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->04.59% (134,217,728B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | ->02.29% (67,108,864B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->02.29% (67,108,864B) 0x7CF5248: malloc (malloc.c:2911) | | ->02.29% (67,108,864B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.29% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.29% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->02.29% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->02.29% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.29% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->02.29% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->01.72% (50,356,224B) 0x82491D4: pthread_create@@GLIBC_2.2.5 (allocatestack.c:513) | | ->01.72% (50,356,224B) in 2 places, all below massif's threshold (1.00%) | | | ->01.36% (39,784,448B) 0x7CF27AD: sysmalloc (malloc.c:2517) | ->01.36% (39,784,448B) 0x7CF3741: _int_malloc (malloc.c:3827) | ->01.36% (39,784,448B) 0x7CF3C08: _int_memalign (malloc.c:4421) | ->01.36% (39,784,448B) 0x7CF871B: posix_memalign (malloc.c:3120) | ->01.36% (39,784,448B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | ->01.36% (39,784,448B) in 2 places, all below massif's threshold (1.00%) | ->02.37% (69,361,664B) 0x401B3F9: mmap (mmap.c:34) | ->02.35% (68,857,856B) 0x40068CB: _dl_map_object_from_fd (dl-map-segments.h:56) | | ->02.35% (68,857,856B) 0x4008C25: _dl_map_object (dl-load.c:2498) | | ->02.28% (66,752,512B) 0x400DBB0: openaux (dl-deps.c:63) | | | ->02.28% (66,752,512B) 0x4010572: _dl_catch_error (dl-error.c:187) | | | ->02.28% (66,752,512B) 0x400E1F0: _dl_map_object_deps (dl-deps.c:254) | | | ->02.28% (66,752,512B) 0x4003A27: dl_main (rtld.c:1647) | | | ->02.28% (66,752,512B) 0x40198AB: _dl_sysdep_start (dl-sysdep.c:249) | | | ->02.28% (66,752,512B) 0x4001C28: _dl_start (rtld.c:323) | | | ->02.28% (66,752,512B) 0x4000C36: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so) | | | ->02.28% (66,752,512B) 0xD: ??? | | | ->02.28% (66,752,512B) 0xFFF00016D: ??? | | | ->02.28% (66,752,512B) 0xFFF00017F: ??? | | | ->02.28% (66,752,512B) 0xFFF000182: ??? | | | ->02.28% (66,752,512B) 0xFFF000187: ??? | | | ->02.28% (66,752,512B) 0xFFF00018A: ??? | | | ->02.28% (66,752,512B) 0xFFF00018F: ??? | | | ->02.28% (66,752,512B) 0xFFF000192: ??? | | | ->02.28% (66,752,512B) 0xFFF000196: ??? | | | ->02.28% (66,752,512B) 0xFFF000199: ??? | | | ->02.28% (66,752,512B) 0xFFF00019B: ??? | | | ->02.28% (66,752,512B) 0xFFF00019E: ??? | | | ->02.28% (66,752,512B) 0xFFF0001A0: ??? | | | ->02.28% (66,752,512B) 0xFFF0001A3: ??? | | | ->02.28% (66,752,512B) 0xFFF0001A5: ??? | | | ->02.28% (66,752,512B) 0xFFF0001A8: ??? | | | | | ->00.07% (2,105,344B) in 1+ places, all below ms_print's threshold (01.00%) | | | ->00.02% (503,808B) in 1+ places, all below ms_print's threshold (01.00%) | ->00.04% (1,101,824B) in 1+ places, all below ms_print's threshold (01.00%)
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B)
28 65,445,038,610 3,053,694,976 3,053,694,976 0 0 29 65,716,280,324 2,925,690,880 2,925,690,880 0 0 30 65,716,281,570 3,053,694,976 3,053,694,976 0 0 31 65,987,523,287 2,925,690,880 2,925,690,880 0 0 32 65,987,524,533 3,053,694,976 3,053,694,976 0 0 33 66,258,766,272 2,925,690,880 2,925,690,880 0 0 34 66,258,767,518 3,053,694,976 3,053,694,976 0 0 35 66,530,009,295 2,925,690,880 2,925,690,880 0 0 36 66,801,252,218 2,925,690,880 2,925,690,880 0 0 37 66,801,253,464 3,053,694,976 3,053,694,976 0 0 38 67,072,495,227 2,925,690,880 2,925,690,880 0 0 39 67,072,496,473 3,053,694,976 3,053,694,976 0 0 40 67,343,738,202 2,925,690,880 2,925,690,880 0 0 41 67,343,739,448 3,053,694,976 3,053,694,976 0 0 42 67,614,981,189 2,925,690,880 2,925,690,880 0 0 43 67,614,982,435 3,053,694,976 3,053,694,976 0 0 44 67,886,224,186 2,925,690,880 2,925,690,880 0 0 45 67,886,225,432 3,053,694,976 3,053,694,976 0 0 100.00% (3,053,694,976B) (page allocation syscalls) mmap/mremap/brk, --alloc-fns, etc. ->97.69% (2,983,231,488B) 0x7D726B9: mmap (mmap.c:34) | ->62.88% (1,920,012,288B) 0x7CF2AFD: sysmalloc (malloc.c:2323) | | ->62.88% (1,920,012,288B) 0x7CF3741: _int_malloc (malloc.c:3827) | | ->54.49% (1,664,008,192B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | | ->54.49% (1,664,008,192B) 0x7CF871B: posix_memalign (malloc.c:3120) | | | ->54.49% (1,664,008,192B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | | ->50.30% (1,536,004,096B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | | ->50.30% (1,536,004,096B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | | ->50.30% (1,536,004,096B) 0x4372F3: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (benchmark-fgemv.C:91) | | | | | ->50.30% (1,536,004,096B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | | | ->50.30% (1,536,004,096B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | | ->50.30% (1,536,004,096B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->50.30% (1,536,004,096B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | ->50.30% (1,536,004,096B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | | | ->04.19% (128,004,096B) 0x4316FA: Givaro::Modular<double, double, void>::Element_ptr FFLAS::Protected::fgemm_convert<Givaro::ModularBalanced<double>, Givaro::Modular<double, double, void>, FFLAS::ModeCategories::DelayedTag>(Givaro::Modular<double, double, void> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::Modular<double, double, void>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DelayedTag, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:61) | | | | ->04.19% (128,004,096B) 0x43474F: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:414) | | | | ->04.19% (128,004,096B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | | ->04.19% (128,004,096B) 0x4372F3: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (benchmark-fgemv.C:91) | | | | ->04.19% (128,004,096B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | | ->04.19% (128,004,096B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->04.19% (128,004,096B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->04.19% (128,004,096B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->04.19% (128,004,096B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | ->08.38% (256,004,096B) 0x7CF5182: malloc (malloc.c:2913) | | ->08.38% (256,004,096B) 0x72271E6: operator new(unsigned long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) | | ->08.38% (256,004,096B) 0x437A50: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | ->08.38% (256,004,096B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | ->08.38% (256,004,096B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->08.38% (256,004,096B) 0x82486B8: start_thread (pthread_create.c:333) | | ->08.38% (256,004,096B) 0x7D7841B: clone (clone.S:109) | | | ->17.58% (536,870,912B) 0x7CEE3CF: new_heap (arena.c:438) | | ->13.19% (402,653,184B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->13.19% (402,653,184B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->10.99% (335,544,320B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->10.99% (335,544,320B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->10.99% (335,544,320B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->10.99% (335,544,320B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->10.99% (335,544,320B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->10.99% (335,544,320B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->10.99% (335,544,320B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->10.99% (335,544,320B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->02.20% (67,108,864B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | | ->02.20% (67,108,864B) 0x7CF871B: posix_memalign (malloc.c:3120) | | | ->02.20% (67,108,864B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | | ->02.20% (67,108,864B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | ->02.20% (67,108,864B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->02.20% (67,108,864B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->02.20% (67,108,864B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.20% (67,108,864B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->02.20% (67,108,864B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->02.20% (67,108,864B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->02.20% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.20% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->02.20% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->04.40% (134,217,728B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->04.40% (134,217,728B) 0x7CF5248: malloc (malloc.c:2911) | | ->04.40% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.20% (67,108,864B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | ->02.20% (67,108,864B) 0x408116: main._omp_fn.1 (zring.h:65) | | | ->02.20% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.20% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->02.20% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->02.20% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.20% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->02.20% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->02.20% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->02.20% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->07.69% (234,881,024B) 0x54022B4: ??? (in /usr/lib/libopenblasp-r0.2.18.so) | | ->07.69% (234,881,024B) 0x5402600: blas_memory_alloc (in /usr/lib/libopenblasp-r0.2.18.so) | | ->04.40% (134,217,728B) 0x51EDF40: cblas_dgemm (in /usr/lib/libopenblasp-r0.2.18.so) | | | ->03.30% (100,663,296B) 0x42563C: Givaro::ZRing<double>::Element_ptr FFLAS::fgemm<Givaro::ZRing<double>, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | | ->03.30% (100,663,296B) 0x426F64: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) (pfgemm_variants.inl:187) | | | | ->02.20% (67,108,864B) 0x428155: FFPACK::rns_double::init(unsigned long, unsigned long, double*, unsigned long, Givaro::Integer const*, unsigned long, unsigned long, bool) const (fflas_pfgemm.inl:66) | | | | | ->02.20% (67,108,864B) 0x428730: FFPACK::RNSInteger<FFPACK::rns_double>::init(FFPACK::rns_double_elt&, Givaro::Integer const&) const (rns-integer.h:115) | | | | | ->02.20% (67,108,864B) 0x432D9A: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (rns-integer.h:63) | | | | | ->02.20% (67,108,864B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | | | ->02.20% (67,108,864B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | | | ->01.10% (33,554,432B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.10% (33,554,432B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.10% (33,554,432B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | | | | ->01.10% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | | ->01.10% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | | ->01.10% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | | | ->01.10% (33,554,432B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->01.10% (33,554,432B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | | | ->01.10% (33,554,432B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | | | ->01.10% (33,554,432B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | | ->01.10% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->01.10% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | ->01.10% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | ->01.10% (33,554,432B) 0x427E04: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) [clone ._omp_fn.26] (pfgemm_variants.inl:213) | | | | ->01.10% (33,554,432B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.10% (33,554,432B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.10% (33,554,432B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | | ->01.10% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->01.10% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->01.10% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->01.10% (33,554,432B) 0x4156A9: void FFLAS::fgemm<Givaro::ModularBalanced<double> >(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | ->01.10% (33,554,432B) 0x41EFEB: Givaro::ModularBalanced<double>::Element_ptr FFLAS::fgemm<Givaro::ModularBalanced<double>, FFLAS::ModeCategories::LazyTag>(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_winograd.inl:400) | | | ->01.10% (33,554,432B) 0x431C19: Givaro::Modular<double, double, void>::Element_ptr FFLAS::Protected::fgemm_convert<Givaro::ModularBalanced<double>, Givaro::Modular<double, double, void>, FFLAS::ModeCategories::DelayedTag>(Givaro::Modular<double, double, void> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::Modular<double, double, void>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DelayedTag, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:429) | | | ->01.10% (33,554,432B) 0x43474F: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:414) | | | ->01.10% (33,554,432B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->01.10% (33,554,432B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->01.10% (33,554,432B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->01.10% (33,554,432B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->01.10% (33,554,432B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->01.10% (33,554,432B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->01.10% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->01.10% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->01.10% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | ->03.30% (100,663,296B) 0x5402CF8: ??? (in /usr/lib/libopenblasp-r0.2.18.so) | | ->03.30% (100,663,296B) 0x82486B8: start_thread (pthread_create.c:333) | | ->03.30% (100,663,296B) 0x7D7841B: clone (clone.S:109) | | | ->06.59% (201,326,592B) 0x7CEE35B: new_heap (arena.c:427) | | ->04.40% (134,217,728B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->04.40% (134,217,728B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->04.40% (134,217,728B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->04.40% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->04.40% (134,217,728B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->04.40% (134,217,728B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->04.40% (134,217,728B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->04.40% (134,217,728B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->04.40% (134,217,728B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->04.40% (134,217,728B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | ->02.20% (67,108,864B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->02.20% (67,108,864B) 0x7CF5248: malloc (malloc.c:2911) | | ->02.20% (67,108,864B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.20% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->02.20% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->02.20% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->02.20% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.20% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->02.20% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->01.65% (50,356,224B) 0x82491D4: pthread_create@@GLIBC_2.2.5 (allocatestack.c:513) | | ->01.65% (50,356,224B) in 2 places, all below massif's threshold (1.00%) | | | ->01.30% (39,784,448B) 0x7CF27AD: sysmalloc (malloc.c:2517) | ->01.30% (39,784,448B) 0x7CF3741: _int_malloc (malloc.c:3827) | ->01.30% (39,784,448B) 0x7CF3C08: _int_memalign (malloc.c:4421) | ->01.30% (39,784,448B) 0x7CF871B: posix_memalign (malloc.c:3120) | ->01.30% (39,784,448B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | ->01.30% (39,784,448B) in 2 places, all below massif's threshold (1.00%) | ->02.27% (69,361,664B) 0x401B3F9: mmap (mmap.c:34) | ->02.25% (68,857,856B) 0x40068CB: _dl_map_object_from_fd (dl-map-segments.h:56) | | ->02.25% (68,857,856B) 0x4008C25: _dl_map_object (dl-load.c:2498) | | ->02.19% (66,752,512B) 0x400DBB0: openaux (dl-deps.c:63) | | | ->02.19% (66,752,512B) 0x4010572: _dl_catch_error (dl-error.c:187) | | | ->02.19% (66,752,512B) 0x400E1F0: _dl_map_object_deps (dl-deps.c:254) | | | ->02.19% (66,752,512B) 0x4003A27: dl_main (rtld.c:1647) | | | ->02.19% (66,752,512B) 0x40198AB: _dl_sysdep_start (dl-sysdep.c:249) | | | ->02.19% (66,752,512B) 0x4001C28: _dl_start (rtld.c:323) | | | ->02.19% (66,752,512B) 0x4000C36: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so) | | | ->02.19% (66,752,512B) 0xD: ??? | | | ->02.19% (66,752,512B) 0xFFF00016D: ??? | | | ->02.19% (66,752,512B) 0xFFF00017F: ??? | | | ->02.19% (66,752,512B) 0xFFF000182: ??? | | | ->02.19% (66,752,512B) 0xFFF000187: ??? | | | ->02.19% (66,752,512B) 0xFFF00018A: ??? | | | ->02.19% (66,752,512B) 0xFFF00018F: ??? | | | ->02.19% (66,752,512B) 0xFFF000192: ??? | | | ->02.19% (66,752,512B) 0xFFF000196: ??? | | | ->02.19% (66,752,512B) 0xFFF000199: ??? | | | ->02.19% (66,752,512B) 0xFFF00019B: ??? | | | ->02.19% (66,752,512B) 0xFFF00019E: ??? | | | ->02.19% (66,752,512B) 0xFFF0001A0: ??? | | | ->02.19% (66,752,512B) 0xFFF0001A3: ??? | | | ->02.19% (66,752,512B) 0xFFF0001A5: ??? | | | ->02.19% (66,752,512B) 0xFFF0001A8: ??? | | | | | ->00.07% (2,105,344B) in 1+ places, all below ms_print's threshold (01.00%) | | | ->00.02% (503,808B) in 1+ places, all below ms_print's threshold (01.00%) | ->00.04% (1,101,824B) in 1+ places, all below ms_print's threshold (01.00%)
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B)
46 68,157,467,145 2,925,690,880 2,925,690,880 0 0 47 68,157,468,391 3,053,694,976 3,053,694,976 0 0 48 68,428,710,128 2,925,690,880 2,925,690,880 0 0 49 68,699,953,057 2,925,690,880 2,925,690,880 0 0 50 68,709,058,621 1,389,686,784 1,389,686,784 0 0 51 70,598,292,752 1,133,682,688 1,133,682,688 0 0 52 70,599,418,454 1,100,128,256 1,100,128,256 0 0 53 70,599,418,494 1,033,019,392 1,033,019,392 0 0 54 70,599,418,534 965,910,528 965,910,528 0 0 100.00% (965,910,528B) (page allocation syscalls) mmap/mremap/brk, --alloc-fns, etc. ->92.70% (895,447,040B) 0x7D726B9: mmap (mmap.c:34) | ->55.58% (536,870,912B) 0x7CEE3CF: new_heap (arena.c:438) | | ->41.69% (402,653,184B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->41.69% (402,653,184B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->34.74% (335,544,320B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->34.74% (335,544,320B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->34.74% (335,544,320B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->34.74% (335,544,320B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->34.74% (335,544,320B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->34.74% (335,544,320B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->34.74% (335,544,320B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->34.74% (335,544,320B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->06.95% (67,108,864B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | | ->06.95% (67,108,864B) 0x7CF871B: posix_memalign (malloc.c:3120) | | | ->06.95% (67,108,864B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | | ->06.95% (67,108,864B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | ->06.95% (67,108,864B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->06.95% (67,108,864B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->06.95% (67,108,864B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->06.95% (67,108,864B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->06.95% (67,108,864B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->06.95% (67,108,864B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->06.95% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->06.95% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->06.95% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->13.90% (134,217,728B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->13.90% (134,217,728B) 0x7CF5248: malloc (malloc.c:2911) | | ->13.90% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->06.95% (67,108,864B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | ->06.95% (67,108,864B) 0x408116: main._omp_fn.1 (zring.h:65) | | | ->06.95% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->06.95% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->06.95% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | | | ->06.95% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->06.95% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->06.95% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->06.95% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->06.95% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->20.84% (201,326,592B) 0x7CEE35B: new_heap (arena.c:427) | | ->13.90% (134,217,728B) 0x7CF2476: sysmalloc (malloc.c:2416) | | | ->13.90% (134,217,728B) 0x7CF3741: _int_malloc (malloc.c:3827) | | | ->13.90% (134,217,728B) 0x7CF5182: malloc (malloc.c:2913) | | | | ->13.90% (134,217,728B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->13.90% (134,217,728B) 0x4E9ACCE: __gmpz_init_set_si (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | | | ->13.90% (134,217,728B) 0x437A78: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (fflas_memory.h:64) | | | | ->13.90% (134,217,728B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | | ->13.90% (134,217,728B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | ->13.90% (134,217,728B) 0x82486B8: start_thread (pthread_create.c:333) | | | | ->13.90% (134,217,728B) 0x7D7841B: clone (clone.S:109) | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | ->06.95% (67,108,864B) 0x7CEEC1F: arena_get2.part.3 (arena.c:646) | | ->06.95% (67,108,864B) 0x7CF5248: malloc (malloc.c:2911) | | ->06.95% (67,108,864B) 0x4E896F7: __gmp_default_allocate (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->06.95% (67,108,864B) 0x4E99856: __gmpz_init (in /usr/lib/x86_64-linux-gnu/libgmp.so.10.3.0) | | ->06.95% (67,108,864B) 0x40A260: void FFLAS::frand<Givaro::ZRing<Givaro::Integer>, Givaro::RandomIntegerIterator<false, false> >(Givaro::ZRing<Givaro::Integer> const&, Givaro::RandomIntegerIterator<false, false>&, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) [clone .isra.407] (gmpxx.h:1497) | | ->06.95% (67,108,864B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->06.95% (67,108,864B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->06.95% (67,108,864B) 0x82486B8: start_thread (pthread_create.c:333) | | ->06.95% (67,108,864B) 0x7D7841B: clone (clone.S:109) | | | ->06.95% (67,108,864B) 0x54022B4: ??? (in /usr/lib/libopenblasp-r0.2.18.so) | | ->06.95% (67,108,864B) 0x5402600: blas_memory_alloc (in /usr/lib/libopenblasp-r0.2.18.so) | | ->06.95% (67,108,864B) 0x51EDF40: cblas_dgemm (in /usr/lib/libopenblasp-r0.2.18.so) | | | ->03.47% (33,554,432B) 0x42563C: Givaro::ZRing<double>::Element_ptr FFLAS::fgemm<Givaro::ZRing<double>, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | | ->03.47% (33,554,432B) 0x426F64: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) (pfgemm_variants.inl:187) | | | | ->03.47% (33,554,432B) 0x427E04: Givaro::ZRing<double>::Element* FFLAS::pfgemm<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag>(Givaro::ZRing<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::ConstElement_ptr, unsigned long, Givaro::ZRing<double>::Element, Givaro::ZRing<double>::Element*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DefaultBoundedTag, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive, FFLAS::StrategyParameter::TwoDAdaptive> >&) [clone ._omp_fn.26] (pfgemm_variants.inl:213) | | | | | ->03.47% (33,554,432B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->03.47% (33,554,432B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->03.47% (33,554,432B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | | | ->03.47% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | | | ->03.47% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | | | ->03.47% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | | | | | ->03.47% (33,554,432B) 0x4156A9: void FFLAS::fgemm<Givaro::ModularBalanced<double> >(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_classical.inl:257) | | | ->03.47% (33,554,432B) 0x41EFEB: Givaro::ModularBalanced<double>::Element_ptr FFLAS::fgemm<Givaro::ModularBalanced<double>, FFLAS::ModeCategories::LazyTag>(Givaro::ModularBalanced<double> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::ConstElement_ptr, unsigned long, Givaro::ModularBalanced<double>::Element, Givaro::ModularBalanced<double>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ModularBalanced<double>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::LazyTag, FFLAS::ParSeqHelper::Sequential>&) (fgemm_winograd.inl:400) | | | ->03.47% (33,554,432B) 0x431C19: Givaro::Modular<double, double, void>::Element_ptr FFLAS::Protected::fgemm_convert<Givaro::ModularBalanced<double>, Givaro::Modular<double, double, void>, FFLAS::ModeCategories::DelayedTag>(Givaro::Modular<double, double, void> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::ConstElement_ptr, unsigned long, Givaro::Modular<double, double, void>::Element, Givaro::Modular<double, double, void>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::Modular<double, double, void>, FFLAS::MMHelperAlgo::Winograd, FFLAS::ModeCategories::DelayedTag, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:429) | | | ->03.47% (33,554,432B) 0x43474F: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_fgemm.inl:414) | | | ->03.47% (33,554,432B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->03.47% (33,554,432B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->03.47% (33,554,432B) 0x783A1C6: GOMP_taskwait (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->03.47% (33,554,432B) 0x436B38: bool benchmark_with_timer<Givaro::ZRing<Givaro::Integer>, Givaro::Integer*, Givaro::Integer*>(Givaro::ZRing<Givaro::Integer>&, int, Givaro::Integer*&, Givaro::Integer*&, Givaro::Integer*&, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, int, double&, unsigned long) (pfgemv.inl:105) | | | ->03.47% (33,554,432B) 0x437E68: void benchmark_in_Field<Givaro::ZRing<Givaro::Integer>, Argument [11]>(Givaro::ZRing<Givaro::Integer>&, int, unsigned long, unsigned long, int, int, unsigned long, unsigned long, int, Argument (&) [11], unsigned long) (benchmark-fgemv.C:183) | | | ->03.47% (33,554,432B) 0x408166: main._omp_fn.1 (benchmark-fgemv.C:204) | | | ->03.47% (33,554,432B) 0x783F9BC: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->03.47% (33,554,432B) 0x82486B8: start_thread (pthread_create.c:333) | | | ->03.47% (33,554,432B) 0x7D7841B: clone (clone.S:109) | | | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | | | ->05.21% (50,356,224B) 0x82491D4: pthread_create@@GLIBC_2.2.5 (allocatestack.c:513) | | ->02.61% (25,178,112B) 0x5403016: blas_thread_init (in /usr/lib/libopenblasp-r0.2.18.so) | | | ->02.61% (25,178,112B) 0x51D807D: gotoblas_init (in /usr/lib/libopenblasp-r0.2.18.so) | | | ->02.61% (25,178,112B) 0x40106C8: call_init.part.0 (dl-init.c:72) | | | ->02.61% (25,178,112B) 0x40107D9: _dl_init (dl-init.c:30) | | | ->02.61% (25,178,112B) 0x4000C68: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so) | | | ->02.61% (25,178,112B) 0xD: ??? | | | ->02.61% (25,178,112B) 0xFFF00016D: ??? | | | ->02.61% (25,178,112B) 0xFFF00017F: ??? | | | ->02.61% (25,178,112B) 0xFFF000182: ??? | | | ->02.61% (25,178,112B) 0xFFF000187: ??? | | | ->02.61% (25,178,112B) 0xFFF00018A: ??? | | | ->02.61% (25,178,112B) 0xFFF00018F: ??? | | | ->02.61% (25,178,112B) 0xFFF000192: ??? | | | ->02.61% (25,178,112B) 0xFFF000196: ??? | | | ->02.61% (25,178,112B) 0xFFF000199: ??? | | | ->02.61% (25,178,112B) 0xFFF00019B: ??? | | | ->02.61% (25,178,112B) 0xFFF00019E: ??? | | | ->02.61% (25,178,112B) 0xFFF0001A0: ??? | | | ->02.61% (25,178,112B) 0xFFF0001A3: ??? | | | ->02.61% (25,178,112B) 0xFFF0001A5: ??? | | | ->02.61% (25,178,112B) 0xFFF0001A8: ??? | | | | | ->02.61% (25,178,112B) 0x783FF90: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.61% (25,178,112B) 0x7836F08: GOMP_parallel (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->02.61% (25,178,112B) 0x404136: main (benchmark-fgemv.C:225) | | | ->04.12% (39,784,448B) 0x7CF27AD: sysmalloc (malloc.c:2517) | | ->04.12% (39,784,448B) 0x7CF3741: _int_malloc (malloc.c:3827) | | ->04.12% (39,784,448B) 0x7CF3C08: _int_memalign (malloc.c:4421) | | ->04.12% (39,784,448B) 0x7CF871B: posix_memalign (malloc.c:3120) | | ->04.12% (39,784,448B) 0x40D223: double* malloc_align<double>(unsigned long, Alignment) (align-allocator.h:72) | | ->02.60% (25,092,096B) 0x432F29: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (fflas_memory.h:78) | | | ->02.60% (25,092,096B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | | ->02.60% (25,092,096B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | | ->02.60% (25,092,096B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.60% (25,092,096B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.60% (25,092,096B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | | ->02.60% (25,092,096B) 0x7836F0D: GOMP_parallel (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | | ->02.60% (25,092,096B) 0x4044DA: main (benchmark-fgemv.C:251) | | | | | ->01.52% (14,692,352B) 0x427ED7: FFPACK::rns_double::init(unsigned long, unsigned long, double*, unsigned long, Givaro::Integer const*, unsigned long, unsigned long, bool) const (fflas_memory.h:78) | | ->01.52% (14,692,352B) 0x43313D: Givaro::Integer* FFLAS::fgemm<FFLAS::ParSeqHelper::Sequential>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, unsigned long, Givaro::Integer, Givaro::Integer const*, unsigned long, Givaro::Integer const*, unsigned long, Givaro::Integer, Givaro::Integer*, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Classic, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Sequential>&) (rns-integer.h:175) | | ->01.52% (14,692,352B) 0x435432: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::fgemv<Givaro::ZRing<Givaro::Integer> >(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long) (fflas_fgemv_mp.inl:97) | | ->01.52% (14,692,352B) 0x4362A5: Givaro::ZRing<Givaro::Integer>::Element_ptr FFLAS::pfgemv<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::StrategyParameter::Grain>(Givaro::ZRing<Givaro::Integer> const&, FFLAS::FFLAS_TRANSPOSE, unsigned long, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::ConstElement_ptr, unsigned long, Givaro::ZRing<Givaro::Integer>::Element, Givaro::ZRing<Givaro::Integer>::Element_ptr, unsigned long, FFLAS::MMHelper<Givaro::ZRing<Givaro::Integer>, FFLAS::MMHelperAlgo::Auto, FFLAS::ModeCategories::ConvertTo<FFLAS::ElementCategories::RNSElementTag>, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Grain> >&) [clone ._omp_fn.21] (in /home/zhg/soft/fflas-ffpack/benchmarks/benchmark-fgemv) | | ->01.52% (14,692,352B) 0x78398C7: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->01.52% (14,692,352B) 0x784214E: ??? (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->01.52% (14,692,352B) 0x40807F: main._omp_fn.1 (benchmark-fgemv.C:251) | | ->01.52% (14,692,352B) 0x7836F0D: GOMP_parallel (in /usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0) | | ->01.52% (14,692,352B) 0x4044DA: main (benchmark-fgemv.C:251) | | | ->00.00% (0B) in 1+ places, all below ms_print's threshold (01.00%) | ->07.18% (69,361,664B) 0x401B3F9: mmap (mmap.c:34) | ->07.13% (68,857,856B) 0x40068CB: _dl_map_object_from_fd (dl-map-segments.h:56) | | ->07.13% (68,857,856B) 0x4008C25: _dl_map_object (dl-load.c:2498) | | ->06.91% (66,752,512B) 0x400DBB0: openaux (dl-deps.c:63) | | | ->06.91% (66,752,512B) 0x4010572: _dl_catch_error (dl-error.c:187) | | | ->06.91% (66,752,512B) 0x400E1F0: _dl_map_object_deps (dl-deps.c:254) | | | ->06.91% (66,752,512B) 0x4003A27: dl_main (rtld.c:1647) | | | ->06.91% (66,752,512B) 0x40198AB: _dl_sysdep_start (dl-sysdep.c:249) | | | ->06.91% (66,752,512B) 0x4001C28: _dl_start (rtld.c:323) | | | ->06.91% (66,752,512B) 0x4000C36: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so) | | | ->06.91% (66,752,512B) 0xD: ??? | | | ->06.91% (66,752,512B) 0xFFF00016D: ??? | | | ->06.91% (66,752,512B) 0xFFF00017F: ??? | | | ->06.91% (66,752,512B) 0xFFF000182: ??? | | | ->06.91% (66,752,512B) 0xFFF000187: ??? | | | ->06.91% (66,752,512B) 0xFFF00018A: ??? | | | ->06.91% (66,752,512B) 0xFFF00018F: ??? | | | ->06.91% (66,752,512B) 0xFFF000192: ??? | | | ->06.91% (66,752,512B) 0xFFF000196: ??? | | | ->06.91% (66,752,512B) 0xFFF000199: ??? | | | ->06.91% (66,752,512B) 0xFFF00019B: ??? | | | ->06.91% (66,752,512B) 0xFFF00019E: ??? | | | ->06.91% (66,752,512B) 0xFFF0001A0: ??? | | | ->06.91% (66,752,512B) 0xFFF0001A3: ??? | | | ->06.91% (66,752,512B) 0xFFF0001A5: ??? | | | ->06.91% (66,752,512B) 0xFFF0001A8: ??? | | | | | ->00.22% (2,105,344B) in 1+ places, all below ms_print's threshold (01.00%) | | | ->00.05% (503,808B) in 1+ places, all below ms_print's threshold (01.00%) | ->00.11% (1,101,824B) in 1+ places, all below ms_print's threshold (01.00%)
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B)
55 70,599,418,574 898,801,664 898,801,664 0 0 56 70,599,449,783 890,408,960 890,408,960 0 0 57 70,599,450,404 882,016,256 882,016,256 0 0 58 70,599,451,042 873,623,552 873,623,552 0 0