Diff of the two buildlogs: -- --- b1/build.log 2025-02-25 06:18:06.281087586 +0000 +++ b2/build.log 2025-02-25 06:30:08.388491020 +0000 @@ -1,6 +1,6 @@ I: pbuilder: network access will be disabled during build -I: Current time: Mon Feb 24 17:48:14 -12 2025 -I: pbuilder-time-stamp: 1740462494 +I: Current time: Tue Mar 31 02:41:06 +14 2026 +I: pbuilder-time-stamp: 1774874466 I: Building the build Environment I: extracting base tarball [/var/cache/pbuilder/trixie-reproducible-base.tgz] I: copying local configuration @@ -26,54 +26,86 @@ dpkg-source: info: applying auto-gitignore I: Not using root during the build. I: Installing the build-deps -I: user script /srv/workspace/pbuilder/5650/tmp/hooks/D02_print_environment starting +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/D01_modify_environment starting +debug: Running on ionos16-i386. +I: Changing host+domainname to test build reproducibility +I: Adding a custom variable just for the fun of it... +I: Changing /bin/sh to bash +'/bin/sh' -> '/bin/bash' +lrwxrwxrwx 1 root root 9 Mar 30 12:41 /bin/sh -> /bin/bash +I: Setting pbuilder2's login shell to /bin/bash +I: Setting pbuilder2's GECOS to second user,second room,second work-phone,second home-phone,second other +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/D01_modify_environment finished +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/D02_print_environment starting I: set - BUILDDIR='/build/reproducible-path' - BUILDUSERGECOS='first user,first room,first work-phone,first home-phone,first other' - BUILDUSERNAME='pbuilder1' - BUILD_ARCH='i386' - DEBIAN_FRONTEND='noninteractive' - DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=11 ' - DISTRIBUTION='trixie' - HOME='/root' - HOST_ARCH='i386' + BASH=/bin/sh + BASHOPTS=checkwinsize:cmdhist:complete_fullquote:extquote:force_fignore:globasciiranges:globskipdots:hostcomplete:interactive_comments:patsub_replacement:progcomp:promptvars:sourcepath + BASH_ALIASES=() + BASH_ARGC=() + BASH_ARGV=() + BASH_CMDS=() + BASH_LINENO=([0]="12" [1]="0") + BASH_LOADABLES_PATH=/usr/local/lib/bash:/usr/lib/bash:/opt/local/lib/bash:/usr/pkg/lib/bash:/opt/pkg/lib/bash:. + BASH_SOURCE=([0]="/tmp/hooks/D02_print_environment" [1]="/tmp/hooks/D02_print_environment") + BASH_VERSINFO=([0]="5" [1]="2" [2]="37" [3]="1" [4]="release" [5]="i686-pc-linux-gnu") + BASH_VERSION='5.2.37(1)-release' + BUILDDIR=/build/reproducible-path + BUILDUSERGECOS='second user,second room,second work-phone,second home-phone,second other' + BUILDUSERNAME=pbuilder2 + BUILD_ARCH=i386 + DEBIAN_FRONTEND=noninteractive + DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=21 ' + DIRSTACK=() + DISTRIBUTION=trixie + EUID=0 + FUNCNAME=([0]="Echo" [1]="main") + GROUPS=() + HOME=/root + HOSTNAME=i-capture-the-hostname + HOSTTYPE=i686 + HOST_ARCH=i386 IFS=' ' - INVOCATION_ID='9078cab414d84d35b872fd34ac2600ba' - LANG='C' - LANGUAGE='en_US:en' - LC_ALL='C' - LD_LIBRARY_PATH='/usr/lib/libeatmydata' - LD_PRELOAD='libeatmydata.so' - MAIL='/var/mail/root' - OPTIND='1' - PATH='/usr/sbin:/usr/bin:/sbin:/bin:/usr/games' - PBCURRENTCOMMANDLINEOPERATION='build' - PBUILDER_OPERATION='build' - PBUILDER_PKGDATADIR='/usr/share/pbuilder' - PBUILDER_PKGLIBDIR='/usr/lib/pbuilder' - PBUILDER_SYSCONFDIR='/etc' - PPID='5650' - PS1='# ' - PS2='> ' + INVOCATION_ID=294283d7587d4c78a12ebbd32468b2fe + LANG=C + LANGUAGE=de_CH:de + LC_ALL=C + LD_LIBRARY_PATH=/usr/lib/libeatmydata + LD_PRELOAD=libeatmydata.so + MACHTYPE=i686-pc-linux-gnu + MAIL=/var/mail/root + OPTERR=1 + OPTIND=1 + OSTYPE=linux-gnu + PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path + PBCURRENTCOMMANDLINEOPERATION=build + PBUILDER_OPERATION=build + PBUILDER_PKGDATADIR=/usr/share/pbuilder + PBUILDER_PKGLIBDIR=/usr/lib/pbuilder + PBUILDER_SYSCONFDIR=/etc + PIPESTATUS=([0]="0") + POSIXLY_CORRECT=y + PPID=60845 PS4='+ ' - PWD='/' - SHELL='/bin/bash' - SHLVL='2' - SUDO_COMMAND='/usr/bin/timeout -k 18.1h 18h /usr/bin/ionice -c 3 /usr/bin/nice /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.Wl3ryZft/pbuilderrc_zD2b --distribution trixie --hookdir /etc/pbuilder/first-build-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/trixie-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.Wl3ryZft/b1 --logfile b1/build.log librsb_1.3.0.2+dfsg-7.dsc' - SUDO_GID='112' - SUDO_UID='107' - SUDO_USER='jenkins' - TERM='unknown' - TZ='/usr/share/zoneinfo/Etc/GMT+12' - USER='root' - _='/usr/bin/systemd-run' - http_proxy='http://46.16.76.132:3128' + PWD=/ + SHELL=/bin/bash + SHELLOPTS=braceexpand:errexit:hashall:interactive-comments:posix + SHLVL=3 + SUDO_COMMAND='/usr/bin/timeout -k 24.1h 24h /usr/bin/ionice -c 3 /usr/bin/nice -n 11 /usr/bin/unshare --uts -- /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.Wl3ryZft/pbuilderrc_cw5q --distribution trixie --hookdir /etc/pbuilder/rebuild-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/trixie-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.Wl3ryZft/b2 --logfile b2/build.log librsb_1.3.0.2+dfsg-7.dsc' + SUDO_GID=112 + SUDO_UID=107 + SUDO_USER=jenkins + TERM=unknown + TZ=/usr/share/zoneinfo/Etc/GMT-14 + UID=0 + USER=root + _='I: set' + http_proxy=http://213.165.73.152:3128 I: uname -a - Linux ionos12-i386 6.1.0-31-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.128-1 (2025-02-07) x86_64 GNU/Linux + Linux i-capture-the-hostname 6.1.0-31-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.128-1 (2025-02-07) x86_64 GNU/Linux I: ls -l /bin - lrwxrwxrwx 1 root root 7 Nov 22 14:40 /bin -> usr/bin -I: user script /srv/workspace/pbuilder/5650/tmp/hooks/D02_print_environment finished + lrwxrwxrwx 1 root root 7 Nov 22 2024 /bin -> usr/bin +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/D02_print_environment finished -> Attempting to satisfy build-dependencies -> Creating pbuilder-satisfydepends-dummy package Package: pbuilder-satisfydepends-dummy @@ -299,7 +331,7 @@ Get: 178 http://deb.debian.org/debian trixie/main i386 libltdl-dev i386 2.5.4-3 [169 kB] Get: 179 http://deb.debian.org/debian trixie/main i386 libhwloc-dev i386 2.12.0-1 [273 kB] Get: 180 http://deb.debian.org/debian trixie/main i386 zlib1g-dev i386 1:1.3.dfsg+really1.3.1-1+b1 [916 kB] -Fetched 392 MB in 7s (52.8 MB/s) +Fetched 392 MB in 7s (58.0 MB/s) Preconfiguring packages ... Selecting previously unselected package liblocale-gettext-perl. (Reading database ... (Reading database ... 5% (Reading database ... 10% (Reading database ... 15% (Reading database ... 20% (Reading database ... 25% (Reading database ... 30% (Reading database ... 35% (Reading database ... 40% (Reading database ... 45% (Reading database ... 50% (Reading database ... 55% (Reading database ... 60% (Reading database ... 65% (Reading database ... 70% (Reading database ... 75% (Reading database ... 80% (Reading database ... 85% (Reading database ... 90% (Reading database ... 95% (Reading database ... 100% (Reading database ... 19788 files and directories currently installed.) @@ -897,8 +929,8 @@ Setting up tzdata (2025a-2) ... Current default time zone: 'Etc/UTC' -Local time is now: Tue Feb 25 05:50:55 UTC 2025. -Universal Time is now: Tue Feb 25 05:50:55 UTC 2025. +Local time is now: Mon Mar 30 12:41:54 UTC 2026. +Universal Time is now: Mon Mar 30 12:41:54 UTC 2026. Run 'dpkg-reconfigure tzdata' if you wish to change it. Setting up libfontenc1:i386 (1:1.1.8-1+b2) ... @@ -1065,7 +1097,11 @@ Building tag database... -> Finished parsing the build-deps I: Building the package -I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-7_source.changes +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/A99_set_merged_usr starting +Not re-configuring usrmerge for trixie +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/A99_set_merged_usr finished +hostname: Name or service not known +I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-7_source.changes dpkg-buildpackage: info: source package librsb dpkg-buildpackage: info: source version 1.3.0.2+dfsg-7 dpkg-buildpackage: info: source distribution unstable @@ -1085,62 +1121,62 @@ dh binary dh_update_autotools_config dh_autoreconf -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found libtoolize: putting auxiliary files in '.'. libtoolize: copying file './ltmain.sh' libtoolize: putting macros in AC_CONFIG_MACRO_DIRS, 'm4'. @@ -1182,90 +1218,90 @@ libtoolize: copying file 'm4/ltsugar.m4' libtoolize: copying file 'm4/ltversion.m4' libtoolize: copying file 'm4/lt~obsolete.m4' -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found configure.ac:66: installing './compile' configure.ac:65: installing './missing' Makefile.am: installing './depcomp' @@ -1451,7 +1487,7 @@ checking for m4... m4 checking for gmake... gmake checking for ggrep... /usr/bin/grep -checking for bash... /bin/bash +checking for bash... /bin/sh checking for gsed... /usr/bin/sed checking for cmp... cmp checking for basename... basename @@ -1568,7 +1604,7 @@ configure: It appears that Fortran programs can be linked without using the Fortran linker. configure: Using OPENMP_CFLAGS ok for linking an OpenMP program: adding it to LIBS. checking if your have a usable getrusage() ... 1 -checking for /bin/bash... /bin/bash +checking for /bin/sh... /bin/sh configure: Will not use Google Test. configure: You seem to not have GNU Octave or have disabled 'int' type. Part of the test suite will not be generated. If you want more testing capabilities, you should enable the 'int' type as well. checking that generated files are newer than configure... done @@ -1590,7 +1626,7 @@ config.status: executing depfiles commands config.status: executing libtool commands === configuring in librsbpp (/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1715,7 +1751,7 @@ configure: Will not use Google Test. checking whether you have std::thread... yes checking whether you have std::mutex... yes -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1724,7 +1760,7 @@ config.status: executing libtool commands configure: Successfully created a Makefile. === configuring in rsblib (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1836,7 +1872,7 @@ checking for filesystem... yes checking for main in -lstdc++fs... yes configure: Assuming you are yet to build librsb.la. (set LIBS= -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la) -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1846,7 +1882,7 @@ config.status: executing libtool commands configure: Created a Makefile. === configuring in rsbtest (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1960,7 +1996,7 @@ checking for filesystem... yes checking for rsb_lib_init... no checking for dd... yes -/bin/bash +/bin/sh /usr/bin/timeout checking that generated files are newer than configure... done configure: creating ./config.status @@ -1992,7 +2028,7 @@ ARFLAGS : cru M4 : m4 MAKE : gmake - BASH : /bin/bash + BASH : /bin/sh OCTAVE : false DOXYGEN : doxygen HELP2MAN : help2man @@ -2011,7 +2047,7 @@ Supported I/O functionality level : "7" vs "7" Interface Error Verbosity : "0" vs "0" Internals Error Verbosity : "0" vs "0" - Memory hierarchy info, detected : "L2:16/64/512K,L1:2/64/64K" + Memory hierarchy info, detected : "L2:16/64/4096K,L1:8/64/32K" Memory hierarchy info, selected : "" Maximum of supported threads : "128" Build Fortran examples : "yes" vs "yes" @@ -2041,28 +2077,28 @@ blhc: ignore-line-regexp: ^.*CC.*: gcc blhc: ignore-line-regexp: ^.*LD.*: /usr/bin/ld.* dh_auto_build - make -j11 + make -j21 make[2]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake all-recursive gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' Making all in librsbpp gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp.o rsbpp.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbtt.o rsbtt.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -c -o rsbct.o rsbct.c libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -fPIC -DPIC -o .libs/rsbpp_coo.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -fPIC -DPIC -o .libs/rsbpp_csr.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -o rsbpp_coo.o >/dev/null 2>&1 libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -o rsbpp_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs libtool: link: ar cr .libs/librsbpp.a .libs/rsbpp_coo.o .libs/rsbpp_csr.o libtool: link: ranlib .libs/librsbpp.a libtool: link: ( cd ".libs" && rm -f "librsbpp.la" && ln -s "../librsbpp.la" "librsbpp.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbpp rsbpp.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbtt rsbtt.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbct rsbct.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp @@ -2083,303 +2119,303 @@ gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsbench-rsb_libspblas_tests.o `test -f 'rsb_libspblas_tests.c' || echo './'`rsb_libspblas_tests.c g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o rsb_dummy.o rsb_dummy.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o ch2icfb-ch2icfb.o `test -f 'ch2icfb.c' || echo './'`ch2icfb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_stropts.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_unroll.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_stropts.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_bench.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_mergesort.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spmv.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_merge.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_ompio.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_merge.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_util.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -o librsb_nounroll_la-rsb_ompio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_is.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mio.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_op.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -o librsb_base_la-rsb_op.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_bio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_get.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c -rsb_test_matops.c: In function 'rsb__main_block_partitioned_spmv_sxsa': -rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] - 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", - | ^~ ~~~ -In file included from /usr/include/stdio.h:970, - from rsb_common.h:49, - from rsb_test_matops.h:79, - from rsb_test_matops.c:74: -In function 'sprintf', - inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, - inlined from 'rsb__main_block_partitioned_spmv_sxsa' at rsb_test_matops.c:1757:4: -/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 - 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 31 | __glibc_objsize (__s), __fmt, - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 32 | __va_arg_pack ()); - | ~~~~~~~~~~~~~~~~~ libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_set.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_check.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_symm.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_idx.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srt.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srtp.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_src.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_test_accuracy.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_clone.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_render.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_render.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -o librsb_base_la-rsb_test_accuracy.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_eps.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_msort_up.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c -rsb_test_matops.c: In function 'rsb__main_block_partitioned_spsv_sxsx': -rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] - 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", - | ^~ ~~~ -In function 'sprintf', - inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, - inlined from 'rsb__main_block_partitioned_spsv_sxsx' at rsb_test_matops.c:6327:4: -/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 - 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 31 | __glibc_objsize (__s), __fmt, - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 32 | __va_arg_pack ()); - | ~~~~~~~~~~~~~~~~~ -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_sys.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_blas_stuff.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -o librsb_base_la-rsb_blas_stuff.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_gen.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_perf.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -o librsb_base_la-rsb_blas_stuff.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -o librsb_base_la-rsb_err.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_internals.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_garbage.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mmio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_partition.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mbw.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_limiter.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_fpb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o -rsb_test_matops.c: In function 'rsb__main_block_partitioned_mat_stats': -rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] - 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", - | ^~ ~~~ -In function 'sprintf', - inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, - inlined from 'rsb__main_block_partitioned_mat_stats' at rsb_test_matops.c:10768:4: -/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 - 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 31 | __glibc_objsize (__s), __fmt, - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 32 | __va_arg_pack ()); - | ~~~~~~~~~~~~~~~~~ -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsum.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsv.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_lock.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_swt.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_init.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -o librsb_base_la-rsb_swt.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_dump.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_cpmv.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_asm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo2rec.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -o librsb_base_la-rsb_cpmv.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2coo.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2csr.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr2coo.o -libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c rsb_blas_sparse.F90 -fPIC -o .libs/rsb_blas_sparse.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c +libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c rsb_blas_sparse.F90 -fPIC -o .libs/rsb_blas_sparse.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -fPIC -DPIC -o .libs/rsb_libspblas_handle.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb_blas_sparse.o rsb_blas_sparse.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -fPIC -DPIC -o .libs/rsb_libspblas.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 libtool: warning: '-version-info' is ignored for programs libtool: link: gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -Wl,-z -Wl,relro -o ch2icfb ch2icfb-ch2icfb.o -fopenmp -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp if test -f ./rsb_types.h -a ! -f ./rsb_types.h ; then cp -pv ./rsb_types.h ./rsb_types.h ; fi # out-of-dir behaviour varies between installations -if test -f ch2icfb ; then if SED=/usr/bin/sed GREP=/usr/bin/grep /bin/bash ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb.lo rsb.F90 +if test -f ch2icfb ; then if SED=/usr/bin/sed GREP=/usr/bin/grep /bin/sh ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb.lo rsb.F90 +gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb.o rsb.F90 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c rsb.F90 -fPIC -o .libs/rsb.o libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c rsb.F90 -o rsb.o >/dev/null 2>&1 -gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o rsb.o rsb.F90 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 +rsb_test_matops.c: In function 'rsb__main_block_partitioned_spmv_sxsa': +rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] + 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", + | ^~ ~~~ +In file included from /usr/include/stdio.h:970, + from rsb_common.h:49, + from rsb_test_matops.h:79, + from rsb_test_matops.c:74: +In function 'sprintf', + inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, + inlined from 'rsb__main_block_partitioned_spmv_sxsa' at rsb_test_matops.c:1757:4: +/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 + 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 31 | __glibc_objsize (__s), __fmt, + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 32 | __va_arg_pack ()); + | ~~~~~~~~~~~~~~~~~ libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -o rsb_libspblas.o >/dev/null 2>&1 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c rsb_blas_sparse.F90 -o rsb_blas_sparse.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_spblas.a .libs/rsb_libspblas_handle.o .libs/rsb_libspblas.o libtool: link: ranlib .libs/librsb_spblas.a libtool: link: ( cd ".libs" && rm -f "librsb_spblas.la" && ln -s "../librsb_spblas.la" "librsb_spblas.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_base.a .libs/librsb_base_la-rsb_is.o .libs/librsb_base_la-rsb_mio.o .libs/librsb_base_la-rsb_op.o .libs/librsb_base_la-rsb_bio.o .libs/librsb_base_la-rsb_get.o .libs/librsb_base_la-rsb_set.o .libs/librsb_base_la-rsb_coo.o .libs/librsb_base_la-rsb_csr.o .libs/librsb_base_la-rsb_coo_check.o .libs/librsb_base_la-rsb_coo_symm.o .libs/librsb_base_la-rsb_idx.o .libs/librsb_base_la-rsb_srt.o .libs/librsb_base_la-rsb_srtp.o .libs/librsb_base_la-rsb_src.o .libs/librsb_base_la-rsb_test_accuracy.o .libs/librsb_base_la-rsb_clone.o .libs/librsb_base_la-rsb_rec.o .libs/librsb_base_la-rsb_render.o .libs/librsb_base_la-rsb_eps.o .libs/librsb_base_la-rsb_msort_up.o .libs/librsb_base_la-rsb_sys.o .libs/librsb_base_la-rsb_blas_stuff.o .libs/librsb_base_la-rsb_gen.o .libs/librsb_base_la-rsb_perf.o .libs/librsb_base_la-rsb_rsb.o .libs/librsb_base_la-rsb_err.o .libs/librsb_base_la-rsb_tune.o .libs/librsb_base_la-rsb_do.o .libs/librsb_base_la-rsb_internals.o .libs/librsb_base_la-rsb_garbage.o .libs/librsb_base_la-rsb_mmio.o .libs/librsb_base_la-rsb_partition.o .libs/librsb_base_la-rsb_mbw.o .libs/librsb_base_la-rsb_limiter.o .libs/librsb_base_la-rsb_fpb.o .libs/librsb_base_la-rsb_spgemm.o .libs/librsb_base_la-rsb_spsum.o .libs/librsb_base_la-rsb_spsv.o .libs/librsb_base_la-rsb_lock.o .libs/librsb_base_la-rsb_swt.o .libs/librsb_base_la-rsb_init.o .libs/librsb_base_la-rsb_dump.o .libs/librsb_base_la-rsb_cpmv.o .libs/librsb_base_la-rsb_asm.o .libs/librsb_base_la-rsb_user.o .libs/librsb_base_la-rsb_coo2rec.o .libs/librsb_base_la-rsb_rec2coo.o .libs/librsb_base_la-rsb_rec2csr.o .libs/librsb_base_la-rsb_csr2coo.o .libs/rsb_blas_sparse.o libtool: link: ranlib .libs/librsb_base.a libtool: link: ( cd ".libs" && rm -f "librsb_base.la" && ln -s "../librsb_base.la" "librsb_base.la" ) +rsb_test_matops.c: In function 'rsb__main_block_partitioned_spsv_sxsx': +rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] + 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", + | ^~ ~~~ +In function 'sprintf', + inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, + inlined from 'rsb__main_block_partitioned_spsv_sxsx' at rsb_test_matops.c:6327:4: +/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 + 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 31 | __glibc_objsize (__s), __fmt, + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 32 | __va_arg_pack ()); + | ~~~~~~~~~~~~~~~~~ +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 +rsb_test_matops.c: In function 'rsb__main_block_partitioned_mat_stats': +rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] + 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", + | ^~ ~~~ +In function 'sprintf', + inlined from 'rsb__impcdstr' at rsb_test_matops.c:214:2, + inlined from 'rsb__main_block_partitioned_mat_stats' at rsb_test_matops.c:10768:4: +/usr/include/i386-linux-gnu/bits/stdio2.h:30:10: note: '__builtin___sprintf_chk' output 19 or more bytes (assuming 2070) into a destination of size 1025 + 30 | return __builtin___sprintf_chk (__s, __USE_FORTIFY_LEVEL - 1, + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 31 | __glibc_objsize (__s), __fmt, + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 32 | __va_arg_pack ()); + | ~~~~~~~~~~~~~~~~~ libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_nounroll.a .libs/librsb_nounroll_la-rsb_stropts.o .libs/librsb_nounroll_la-rsb_strmif.o .libs/librsb_nounroll_la-rsb_unroll.o .libs/librsb_nounroll_la-rsb_krnl_vb.o .libs/librsb_nounroll_la-rsb_krnl_lb.o .libs/librsb_nounroll_la-rsb_krnl.o .libs/librsb_nounroll_la-rsb_bench.o .libs/librsb_nounroll_la-rsb_mergesort.o .libs/librsb_nounroll_la-rsb_permute.o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss.o .libs/librsb_nounroll_la-rsb_spmv.o .libs/librsb_nounroll_la-rsb_merge.o .libs/librsb_nounroll_la-rsb_ompio.o .libs/librsb_nounroll_la-rsb_util.o .libs/librsb_nounroll_la-rsb_spgemm_csr.o .libs/librsb_nounroll_la-rsb_spsum_misc.o .libs/librsb_nounroll_la-rsb_prec.o libtool: link: ranlib .libs/librsb_nounroll.a libtool: link: ( cd ".libs" && rm -f "librsb_nounroll.la" && ln -s "../librsb_nounroll.la" "librsb_nounroll.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/i386-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/i386-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: gcc -shared -fPIC -DPIC .libs/rsb.o -Wl,--whole-archive ./.libs/librsb_nounroll.a ./.libs/librsb_base.a ./.libs/librsb_spblas.a /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/.libs/librsbpp.a -Wl,--no-whole-archive -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lpthread -lstdc++fs -lstdc++ -lhwloc -lz -lgfortran -lm -lquadmath -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Werror=format-security -O3 -Wl,-z -Wl,relro -fopenmp -fopenmp -Wl,-soname -Wl,librsb.so.0 -o .libs/librsb.so.0.0.0 libtool: link: (cd ".libs" && rm -f "librsb.so.0" && ln -s "librsb.so.0.0.0" "librsb.so.0") libtool: link: (cd ".libs" && rm -f "librsb.so" && ln -s "librsb.so.0.0.0" "librsb.so") @@ -2391,7 +2427,7 @@ libtool: link: ranlib .libs/librsb.a libtool: link: rm -fr .libs/librsb.lax libtool: link: ( cd ".libs" && rm -f "librsb.la" && ln -s "../librsb.la" "librsb.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o -fopenmp ./.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -2408,27 +2444,27 @@ gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o fortran.o fortran.F90 gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -pipe -fopenmp -c -o fortran_rsb_fi.o fortran_rsb_fi.F90 g++ -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o cplusplus.o cplusplus.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/io-spblas io-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello hello.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/backsolve backsolve.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/power power.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/autotune autotune.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran_rsb_fi fortran_rsb_fi.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran fortran.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/snippets snippets.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/cplusplus cplusplus.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/14 -L/usr/lib/gcc/i686-linux-gnu/14/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/14/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' Making all in scripts @@ -2448,9 +2484,9 @@ /usr/bin/mkdir -p man /usr/bin/mkdir -p man gmake makedox -SOURCE_DATE_EPOCH=1735736461 \ +SOURCE_DATE_EPOCH=1767178861 \ help2man --name="benchmark and test for librsb" --no-info ../rsbench | /usr/bin/sed 's/January //g' > man/rsbench.1 -SOURCE_DATE_EPOCH=1735736461 \ +SOURCE_DATE_EPOCH=1767178861 \ help2man --name="provide configuration information for librsb" --no-info /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb-config | /usr/bin/sed 's/January //g' > man/librsb-config.1 gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/doc' DOXYGEN_PROJECT_NUMBER=1.3.0.2 doxygen Doxyfile || echo "are you sure you have doxygen installed ?" @@ -3162,13 +3198,13 @@ g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_ne-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_rv-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp libtool: link: g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp libtool: link: g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' Making all in examples @@ -3183,25 +3219,25 @@ g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o mtx2bin.o mtx2bin.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o render.o render.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o span.o span.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib/examples' gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' @@ -3211,7 +3247,7 @@ gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' g++ -DHAVE_CONFIG_H -I. -I/build/reproducible-path/librsb-1.3.0.2+dfsg -Wdate-time -D_FORTIFY_SOURCE=2 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rsbtest.o rsbtest.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lstdc++fs -fopenmp gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' @@ -3287,7 +3323,7 @@ gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake mtests -C . gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash -ex ./scripts/readme-tests.sh +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh -ex ./scripts/readme-tests.sh + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + ./rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # --bench option implies -qH -R --write-performance-record --want-mkl-autotune --mkl-benchmark --types : --split-experimental 6 --merge-experimental 6 --also-transpose --sort-filenames-list --want-memory-benchmark @@ -3297,15 +3333,15 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740464014 +# beginning run at 1774875110 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 5.32e-07 s -# Will write a final performance record to file rsbench_pr__1740464014_gcc-14.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1740464014_gcc-14.2-1,4th.rpr.tmp +# average timer granularity: 5.08e-08 s +# Will write a final performance record to file rsbench_pr__1774875110_gcc-14.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1774875110_gcc-14.2-1,4th.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -3344,61 +3380,61 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos12-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.698s +# Memory benchmark took 6.676s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 8 samples (2016 bytes). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 5.720s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 6.679s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type Z... -# file input of A.mtx took 0.00 s (6 nnz, 6067 nnz/s ) (0.19 MB/s ) -#pre-sorting (6 elements) took 0.014075 s -#weeding duplicates (to 6 elements) took 1.90735e-06 s (and check, 1.90735e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 58254 nnz/s ) (1.79 MB/s ) +#pre-sorting (6 elements) took 0.0016551 s +#weeding duplicates (to 6 elements) took 2.14577e-06 s (and check, 2.86102e-06 s ) # multi-nrhs benchmarking (1,2) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.088s): (3 x 3)[0x581c9460]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.000s): (3 x 3)[0x58324390]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.04799s; avg 0.016s ( +/- 0.02/ 0.01 %); best 0.016s; worst 0.016s; std dev. 1.911e-06 (taking best). -Reference operation time is 0.015995 s (0.006002 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 2.694e-05s, ~9.06e-06s of computing time (of which 2.146e-06s sorting, 4.053e-06s analysis) -3 iterations (1 th.) took 0.001042s; avg 0.0003474s ( +/- 99.45/198.83 %); best 1.907e-06s; worst 0.001038s; std dev. 0.0004884 (taking best). -Reference operation time is 1.90735e-06 s (50.33 Mflops) with 1 threads. -After merge step 1: tpop: 1.907e-06 s ~Mflops: 50.332 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 8386.000x: 0.016s -> 1.907e-06s, so taking this instance. +3 iterations (1 th.) took 8.798e-05s; avg 2.933e-05s ( +/- 76.42/149.59 %); best 6.914e-06s; worst 7.319e-05s; std dev. 3.102e-05 (taking best). +Reference operation time is 6.91414e-06 s (13.88 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 6.914e-06 Mflops: 13.885) +Merge (3 -> 1 leaves) took w.c.t. of 2.003e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 3.815e-06s analysis) +3 iterations (1 th.) took 3.386e-05s; avg 1.129e-05s ( +/- 99.54/191.55 %); best 5.211e-08s; worst 3.29e-05s; std dev. 1.529e-05 (taking best). +Reference operation time is 5.21064e-08 s (1842 Mflops) with 1 threads. +After merge step 1: tpop: 5.211e-08 s ~Mflops: 1842.385 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 132.693x: 6.914e-06s -> 5.211e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05602s (of which 3.195e-05s partitioning, 0s I/O); computing times: 9.06e-06s in par. loops, 2.146e-06s sorting, 4.053e-06s analyzing) -Total merge + benchmarking process took 0.05602s, equivalent to 29371.8/3.5 new/old ops (0.1017s for 2 clones -- as 53300.8/6.4 ops, or 26650.4/3.2 ops per clone), SPEEDUP of 8386.000x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 8386.000x (0.016s -> 1.907e-06s), will amortize in 3.5 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.15s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 838500.0% (8386x) (from 0.006002 to 50.33 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 9.68e-05s (of which 2.503e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 3.815e-06s analyzing) +Total merge + benchmarking process took 9.68e-05s, equivalent to 1857.7/14.0 new/old ops (4.22e-05s for 2 clones -- as 809.9/6.1 ops, or 404.9/3.1 ops per clone), SPEEDUP of 132.693x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 132.693x (6.914e-06s -> 5.211e-08s), will amortize in 14.1 ops by saving 6.862e-06s per op. +In 1 tuning rounds (tot. 0.00028s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 13169.3% (132.7x) (from 13.88 to 1842 Mflops). #pr: updating sample at index 1 (0^th of 8), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.152044 s (1.600e-02 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000327826 s (6.914e-06 s -> 5.211e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.197576 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000526905 s and estimated a speedup of 1.000000 x (5.211e-08 s -> 5.211e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.023854 0.031974 0.055828 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.055828 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.023854 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.031974 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.000031 0.000016 0.000047 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000047 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.000031 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.000016 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.055828 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000047 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3413,47 +3449,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.012s): (3 x 3)[0x581ccba0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.021s): (3 x 3)[0x58327a80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 9.584e-05s; avg 3.195e-05s ( +/- 21.64/ 31.34 %); best 2.503e-05s; worst 4.196e-05s; std dev. 7.25e-06 (taking best). -Reference operation time is 2.5034e-05 s (3.835 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 2.503e-05 Mflops: 3.835) -Merge (3 -> 1 leaves) took w.c.t. of 2.313e-05s, ~9.06e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (4 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 52.00/ 44.00 %); best 9.537e-07s; worst 2.861e-06s; std dev. 7.867e-07 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 4 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 26.250x: 2.503e-05s -> 9.537e-07s, so taking this instance. +3 iterations (4 th.) took 8.512e-05s; avg 2.837e-05s ( +/- 64.71/108.40 %); best 1.001e-05s; worst 5.913e-05s; std dev. 2.188e-05 (taking best). +Reference operation time is 1.00136e-05 s (9.587 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.001e-05 Mflops: 9.587) +Merge (3 -> 1 leaves) took w.c.t. of 1.097e-05s, ~5.007e-06s of computing time (of which 1.907e-06s sorting, 9.537e-07s analysis) +3 iterations (4 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 94.54/100.00 %); best 5.211e-08s; worst 1.907e-06s; std dev. 7.787e-07 (taking best). +Reference operation time is 5.21064e-08 s (1842 Mflops) with 4 threads. +After merge step 1: tpop: 5.211e-08 s ~Mflops: 1842.385 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 192.176x: 1.001e-05s -> 5.211e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 8.488e-05s (of which 2.909e-05s partitioning, 0s I/O); computing times: 9.06e-06s in par. loops, 1.907e-06s sorting, 2.146e-06s analyzing) -Total merge + benchmarking process took 8.488e-05s, equivalent to 89.0/3.4 new/old ops (0.0002871s for 2 clones -- as 301.0/11.5 ops, or 150.5/5.7 ops per clone), SPEEDUP of 26.250x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 26.250x (2.503e-05s -> 9.537e-07s), will amortize in 3.5 ops by saving 2.408e-05s per op. -In 1 tuning rounds (tot. 0.00048s, 0.00029s for constructor, 2 clones) obtained a SPEEDUP of 2525.0% (26.25x) (from 3.835 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 4.983e-05s (of which 1.383e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.907e-06s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 4.983e-05s, equivalent to 956.3/5.0 new/old ops (3.505e-05s for 2 clones -- as 672.6/3.5 ops, or 336.3/1.8 ops per clone), SPEEDUP of 192.176x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 192.176x (1.001e-05s -> 5.211e-08s), will amortize in 5.0 ops by saving 9.961e-06s per op. +In 1 tuning rounds (tot. 0.00019s, 3.5e-05s for constructor, 2 clones) obtained a SPEEDUP of 19117.6% (192.2x) (from 9.587 to 1842 Mflops). #pr: updating sample at index 5 (1^th of 8), 0^th touch for (0,1,0,0,0,0,0). -First run of RSB Autotuner took 0.000497818 s (2.503e-05 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000201941 s (1.001e-05 s -> 5.211e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.00116587 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000708103 s and estimated a speedup of 1.000000 x (5.211e-08 s -> 5.211e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.000029 0.011850 0.011879 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.011879 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.000029 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.011850 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.007837 0.008508 0.016345 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.016345 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.007837 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.008508 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.011879 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.016345 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 4.70 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 4.70 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 820.09 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 2.70 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 820.09 2.70 4.70 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.00 0.00 0.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3462,50 +3498,50 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.0878298 1e+09 1e+09 +%operation:A.mtx 7.79629e-05 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.023854 0 0.0319741 +%constructor:A.mtx 0 3.09944e-05 0 1.5974e-05 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,2) -- now using nrhs 2. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x581ccba0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.023s): (3 x 3)[0x58327a80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.001362s; avg 0.0004539s ( +/- 96.90/193.17 %); best 1.407e-05s; worst 0.001331s; std dev. 0.0006201 (taking best). -Reference operation time is 1.40667e-05 s (13.65 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.407e-05 Mflops: 13.649) -Merge (3 -> 1 leaves) took w.c.t. of 1.502e-05s, ~5.007e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) -3 iterations (1 th.) took 0.00056s; avg 0.0001867s ( +/- 98.98/197.83 %); best 1.907e-06s; worst 0.000556s; std dev. 0.0002611 (taking best). -Reference operation time is 1.90735e-06 s (100.7 Mflops) with 1 threads. -After merge step 1: tpop: 1.907e-06 s ~Mflops: 100.663 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 7.375x: 1.407e-05s -> 1.907e-06s, so taking this instance. +3 iterations (1 th.) took 0.01311s; avg 0.00437s ( +/- 0.54/ 0.66 %); best 0.004347s; worst 0.004399s; std dev. 2.165e-05 (taking best). +Reference operation time is 0.00434685 s (0.04417 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.004347 Mflops: 0.044) +Merge (3 -> 1 leaves) took w.c.t. of 1.001e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 2.146e-06s analysis) +3 iterations (1 th.) took 2.313e-05s; avg 7.709e-06s ( +/- 87.63/172.16 %); best 9.537e-07s; worst 2.098e-05s; std dev. 9.385e-06 (taking best). +Reference operation time is 9.53674e-07 s (201.3 Mflops) with 1 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 201.327 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 4558.000x: 0.004347s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.002748s (of which 2.003e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.192e-06s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.002748s, equivalent to 1440.8/195.4 new/old ops (0.002174s for 2 clones -- as 1139.8/154.5 ops, or 569.9/77.3 ops per clone), SPEEDUP of 7.375x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 7.375x (1.407e-05s -> 1.907e-06s), will amortize in 226.0 ops by saving 1.216e-05s per op. -In 1 tuning rounds (tot. 0.0042s, 0.0022s for constructor, 2 clones) obtained a SPEEDUP of 637.5% (7.375x) (from 13.65 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01301s (of which 1.407e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.01301s, equivalent to 13646.2/3.0 new/old ops (0.02595s for 2 clones -- as 27211.2/6.0 ops, or 13605.6/3.0 ops per clone), SPEEDUP of 4558.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 4558.000x (0.004347s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.004346s per op. +In 1 tuning rounds (tot. 0.039s, 0.026s for constructor, 2 clones) obtained a SPEEDUP of 455700.0% (4558x) (from 0.04417 to 201.3 Mflops). #pr: updating sample at index 3 (2^th of 8), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.00419092 s (1.407e-05 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0391669 s (4.347e-03 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000741005 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.217786 s and estimated a speedup of 1.000000 x (5.211e-08 s -> 5.211e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.000155 0.000024 0.000179 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000179 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.000155 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.000024 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.005736 0.008569 0.014305 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.014305 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.005736 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.008569 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000179 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.014305 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3520,47 +3556,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.046s): (3 x 3)[0x581ccba0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.025s): (3 x 3)[0x58327a80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 0.000104s; avg 3.465e-05s ( +/- 13.30/ 23.85 %); best 3.004e-05s; worst 4.292e-05s; std dev. 5.857e-06 (taking best). -Reference operation time is 3.00407e-05 s (6.391 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 3.004e-05 Mflops: 6.391) -Merge (3 -> 1 leaves) took w.c.t. of 1.788e-05s, ~5.96e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (4 th.) took 6.914e-06s; avg 2.305e-06s ( +/- 48.28/ 65.52 %); best 1.192e-06s; worst 3.815e-06s; std dev. 1.107e-06 (taking best). -Reference operation time is 1.19209e-06 s (161.1 Mflops) with 4 threads. -After merge step 1: tpop: 1.192e-06 s ~Mflops: 161.061 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 25.200x: 3.004e-05s -> 1.192e-06s, so taking this instance. +3 iterations (4 th.) took 0.01305s; avg 0.00435s ( +/- 1.54/ 0.78 %); best 0.004283s; worst 0.004384s; std dev. 4.726e-05 (taking best). +Reference operation time is 0.00428319 s (0.04483 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.004283 Mflops: 0.045) +Merge (3 -> 1 leaves) took w.c.t. of 1.311e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (4 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.88/142.86 %); best 5.211e-08s; worst 4.053e-06s; std dev. 1.73e-06 (taking best). +Reference operation time is 5.21064e-08 s (3685 Mflops) with 4 threads. +After merge step 1: tpop: 5.211e-08 s ~Mflops: 3684.769 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 82200.869x: 0.004283s -> 5.211e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 8.488e-05s (of which 2.193e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 1.907e-06s sorting, 2.146e-06s analyzing) -Total merge + benchmarking process took 8.488e-05s, equivalent to 71.2/2.8 new/old ops (6.89e-05s for 2 clones -- as 57.8/2.3 ops, or 28.9/1.1 ops per clone), SPEEDUP of 25.200x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 25.200x (3.004e-05s -> 1.192e-06s), will amortize in 2.9 ops by saving 2.885e-05s per op. -In 1 tuning rounds (tot. 0.00026s, 6.9e-05s for constructor, 2 clones) obtained a SPEEDUP of 2420.0% (25.2x) (from 6.391 to 161.1 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01293s (of which 1.621e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01293s, equivalent to 248126.3/3.0 new/old ops (0.02594s for 2 clones -- as 497881.5/6.1 ops, or 248940.7/3.0 ops per clone), SPEEDUP of 82200.869x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 82200.869x (0.004283s -> 5.211e-08s), will amortize in 3.0 ops by saving 0.004283s per op. +In 1 tuning rounds (tot. 0.039s, 0.026s for constructor, 2 clones) obtained a SPEEDUP of 8219986.9% (8.22e+04x) (from 0.04483 to 3685 Mflops). #pr: updating sample at index 7 (3^th of 8), 0^th touch for (0,1,0,0,1,0,0). -First run of RSB Autotuner took 0.000277042 s (3.004e-05 s -> 1.192e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0390859 s (4.283e-03 s -> 5.211e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.0011971 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.217154 s and estimated a speedup of 1.000000 x (5.211e-08 s -> 5.211e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000001 0.000144 0.030825 0.030969 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.030970 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.000144 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.030825 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 0.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.030969 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.007899 0.010612 0.018511 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.018511 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.007899 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.010612 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.018511 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.01 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.01 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 1.08 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 0.00 1.08 0.00 0.01 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.77 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.77 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.73 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.81 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.73 0.81 0.77 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3569,185 +3605,185 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.000209808 1e+09 1e+09 +%operation:A.mtx 0.022927 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.000154972 0 2.40803e-05 +%constructor:A.mtx 0 0.00573587 0 0.008569 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 6.314s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.358s/0.000s . +# so far, program took 7.427s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.515s/0.000s . getrusage() stats: -ru_maxrss: 6 (maximum resident set size -- MB) -ru_stime : 0.06395s (system CPU time used) -ru_utime : 12.43s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.2464s (system CPU time used) +ru_utime : 21.77s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 50.33 1.600e-02 0.000e+00 1.907e-06 0.000e+00 1.520e-01 1.41e-01 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 2.503e-05 0.000e+00 9.537e-07 0.000e+00 4.978e-04 2.81e-01 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1842.38 6.914e-06 0.000e+00 5.211e-08 0.000e+00 3.278e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1842.38 1.001e-05 0.000e+00 5.211e-08 0.000e+00 2.019e-04 5.14e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 420512.5 % faster, avg. sp. ratio 4206.125x, max sp. ratio 8386.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 40118.4/522.0/79714.8/80236.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 14.7/ 9.5/ 19.9/ 29.4 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 15.1, min. 9.5, max. 20.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 16143.4 % faster, avg. sp. ratio 162.434x, max sp. ratio 192.176x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5083.5/3875.5/6291.5/10167.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 33.8/ 20.2/ 47.4/ 67.6 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 34.0, min. 20.3, max. 47.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.173/ 0.115/ 0.231,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.422/ 0.141/ 0.281,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.222/ 4.222/ 4.222,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.287/ 5.143/ 5.143,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.08 s, min 0.00 s, max 0.15 s, tot 0.15 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.08 s, min 0.00 s, max 0.15 s, tot 0.15 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 7.550e+01, min 5.033e+01, max 1.007e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.920e+00, min 6.002e-03, max 3.835e+00 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 2.861e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.010e-03s, min 2.503e-05s, max 1.600e-02s, tot 1.602e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.515e+01 7.031e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.842e+03, min 1.842e+03, max 1.842e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.174e+01, min 9.587e+00, max 1.388e+01 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.211e-08s, min 5.211e-08s, max 5.211e-08s, tot 1.042e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 8.464e-06s, min 6.914e-06s, max 1.001e-05s, tot 1.693e-05s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.584e+00 5.584e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 #pr: ======== Limiting to nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 1.907e-06 0.000e+00 4.191e-03 2.16e-01 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 161.06 3.004e-05 0.000e+00 1.192e-06 0.000e+00 2.770e-04 3.46e-01 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 4.347e-03 0.000e+00 9.537e-07 0.000e+00 3.917e-02 4.32e-01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3684.77 4.283e-03 0.000e+00 5.211e-08 0.000e+00 3.909e-02 7.91e+00 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1528.8 % faster, avg. sp. ratio 16.288x, max sp. ratio 25.200x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1214.8/232.4/2197.2/2429.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 153.6/ 9.2/297.9/307.2 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 177.1, min. 9.6, max. 344.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 4337843.5 % faster, avg. sp. ratio 43379.435x, max sp. ratio 82200.869x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 395593.1/41069.5/750116.7/791186.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 9.0/ 9.1/ 18.1 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 9.0, max. 9.1 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.215/ 0.166/ 0.265,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.562/ 0.216/ 0.346,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.198/ 0.331/ 6.065,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.339/ 0.432/ 7.907,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.309e+02, min 1.007e+02, max 1.611e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.002e+01, min 6.391e+00, max 1.365e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.550e-06s, min 1.192e-06s, max 1.907e-06s, tot 3.099e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 2.205e-05s, min 1.407e-05s, max 3.004e-05s, tot 4.411e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.059e+01 4.895e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.800e+00 x, min 1.600e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.04 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.04 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.943e+03, min 2.013e+02, max 3.685e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.450e-02, min 4.417e-02, max 4.483e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.029e-07s, min 5.211e-08s, max 9.537e-07s, tot 1.006e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 4.315e-03s, min 4.283e-03s, max 4.347e-03s, tot 8.630e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.888e+00 7.115e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.055e+00 x, min 1.093e-01 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 50.33 1.600e-02 0.000e+00 1.907e-06 0.000e+00 1.520e-01 1.41e-01 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 1.907e-06 0.000e+00 4.191e-03 2.16e-01 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 2.503e-05 0.000e+00 9.537e-07 0.000e+00 4.978e-04 2.81e-01 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 161.06 3.004e-05 0.000e+00 1.192e-06 0.000e+00 2.770e-04 3.46e-01 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1842.38 6.914e-06 0.000e+00 5.211e-08 0.000e+00 3.278e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 4.347e-03 0.000e+00 9.537e-07 0.000e+00 3.917e-02 4.32e-01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1842.38 1.001e-05 0.000e+00 5.211e-08 0.000e+00 2.019e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3684.77 4.283e-03 0.000e+00 5.211e-08 0.000e+00 3.909e-02 7.91e+00 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 211020.6 % faster, avg. sp. ratio 2111.206x, max sp. ratio 8386.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 20666.6/232.4/79714.8/82666.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 84.1/ 9.2/297.9/336.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 96.1, min. 9.5, max. 344.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 2176993.4 % faster, avg. sp. ratio 21770.934x, max sp. ratio 82200.869x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 200338.3/3875.5/750116.7/801353.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 21.4/ 9.0/ 47.4/ 85.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 21.5, min. 9.0, max. 47.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.194/ 0.115/ 0.265,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.983/ 0.141/ 0.346,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.710/ 0.331/ 6.065,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 18.626/ 0.432/ 7.907,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.00 s, max 0.15 s, tot 0.16 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.00 s, max 0.15 s, tot 0.16 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.032e+02, min 5.033e+01, max 1.611e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 5.970e+00, min 6.002e-03, max 1.365e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.490e-06s, min 9.537e-07s, max 1.907e-06s, tot 5.960e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 4.016e-03s, min 1.407e-05s, max 1.600e-02s, tot 1.606e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.059e+01 7.031e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.800e+00 x, min 1.600e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.08 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.08 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.893e+03, min 2.013e+02, max 3.685e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.890e+00, min 4.417e-02, max 1.388e+01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.775e-07s, min 5.211e-08s, max 9.537e-07s, tot 1.110e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 2.162e-03s, min 6.914e-06s, max 4.347e-03s, tot 8.647e-03s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.888e+00 7.115e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.055e+00 x, min 1.093e-01 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 50.33 1.600e-02 0.000e+00 1.907e-06 0.000e+00 1.520e-01 1.41e-01 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 2.503e-05 0.000e+00 9.537e-07 0.000e+00 4.978e-04 2.81e-01 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1842.38 6.914e-06 0.000e+00 5.211e-08 0.000e+00 3.278e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1842.38 1.001e-05 0.000e+00 5.211e-08 0.000e+00 2.019e-04 5.14e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 420512.5 % faster, avg. sp. ratio 4206.125x, max sp. ratio 8386.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 40118.4/522.0/79714.8/80236.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 14.7/ 9.5/ 19.9/ 29.4 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 15.1, min. 9.5, max. 20.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 16143.4 % faster, avg. sp. ratio 162.434x, max sp. ratio 192.176x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5083.5/3875.5/6291.5/10167.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 33.8/ 20.2/ 47.4/ 67.6 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 34.0, min. 20.3, max. 47.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.173/ 0.115/ 0.231,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.422/ 0.141/ 0.281,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.222/ 4.222/ 4.222,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.287/ 5.143/ 5.143,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.08 s, min 0.00 s, max 0.15 s, tot 0.15 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.08 s, min 0.00 s, max 0.15 s, tot 0.15 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 7.550e+01, min 5.033e+01, max 1.007e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.920e+00, min 6.002e-03, max 3.835e+00 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 2.861e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.010e-03s, min 2.503e-05s, max 1.600e-02s, tot 1.602e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.515e+01 7.031e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.842e+03, min 1.842e+03, max 1.842e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.174e+01, min 9.587e+00, max 1.388e+01 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.211e-08s, min 5.211e-08s, max 5.211e-08s, tot 1.042e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 8.464e-06s, min 6.914e-06s, max 1.001e-05s, tot 1.693e-05s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.584e+00 5.584e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 #pr: ======== Limiting to both transA=N and nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 1.907e-06 0.000e+00 4.191e-03 2.16e-01 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 161.06 3.004e-05 0.000e+00 1.192e-06 0.000e+00 2.770e-04 3.46e-01 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 4.347e-03 0.000e+00 9.537e-07 0.000e+00 3.917e-02 4.32e-01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3684.77 4.283e-03 0.000e+00 5.211e-08 0.000e+00 3.909e-02 7.91e+00 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1528.8 % faster, avg. sp. ratio 16.288x, max sp. ratio 25.200x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1214.8/232.4/2197.2/2429.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 153.6/ 9.2/297.9/307.2 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 177.1, min. 9.6, max. 344.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 4337843.5 % faster, avg. sp. ratio 43379.435x, max sp. ratio 82200.869x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 395593.1/41069.5/750116.7/791186.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 9.0/ 9.1/ 18.1 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 9.0, max. 9.1 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.215/ 0.166/ 0.265,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.562/ 0.216/ 0.346,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.198/ 0.331/ 6.065,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.339/ 0.432/ 7.907,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.309e+02, min 1.007e+02, max 1.611e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.002e+01, min 6.391e+00, max 1.365e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.550e-06s, min 1.192e-06s, max 1.907e-06s, tot 3.099e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 2.205e-05s, min 1.407e-05s, max 3.004e-05s, tot 4.411e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.059e+01 4.895e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.800e+00 x, min 1.600e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.04 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.04 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.943e+03, min 2.013e+02, max 3.685e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.450e-02, min 4.417e-02, max 4.483e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.029e-07s, min 5.211e-08s, max 9.537e-07s, tot 1.006e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 4.315e-03s, min 4.283e-03s, max 4.347e-03s, tot 8.630e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.888e+00 7.115e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.055e+00 x, min 1.093e-01 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 4) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -3757,44 +3793,44 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 50.33 1.600e-02 0.000e+00 1.907e-06 0.000e+00 1.520e-01 1.41e-01 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 1.907e-06 0.000e+00 4.191e-03 2.16e-01 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 2.503e-05 0.000e+00 9.537e-07 0.000e+00 4.978e-04 2.81e-01 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 161.06 3.004e-05 0.000e+00 1.192e-06 0.000e+00 2.770e-04 3.46e-01 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1842.38 6.914e-06 0.000e+00 5.211e-08 0.000e+00 3.278e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 4.347e-03 0.000e+00 9.537e-07 0.000e+00 3.917e-02 4.32e-01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1842.38 1.001e-05 0.000e+00 5.211e-08 0.000e+00 2.019e-04 5.14e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3684.77 4.283e-03 0.000e+00 5.211e-08 0.000e+00 3.909e-02 7.91e+00 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 211020.6 % faster, avg. sp. ratio 2111.206x, max sp. ratio 8386.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 20666.6/232.4/79714.8/82666.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 84.1/ 9.2/297.9/336.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 96.1, min. 9.5, max. 344.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 2176993.4 % faster, avg. sp. ratio 21770.934x, max sp. ratio 82200.869x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 200338.3/3875.5/750116.7/801353.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 21.4/ 9.0/ 47.4/ 85.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 21.5, min. 9.0, max. 47.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.194/ 0.115/ 0.265,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.983/ 0.141/ 0.346,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.710/ 0.331/ 6.065,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 18.626/ 0.432/ 7.907,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.00 s, max 0.15 s, tot 0.16 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.00 s, max 0.15 s, tot 0.16 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.032e+02, min 5.033e+01, max 1.611e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 5.970e+00, min 6.002e-03, max 1.365e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.490e-06s, min 9.537e-07s, max 1.907e-06s, tot 5.960e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 4.016e-03s, min 1.407e-05s, max 1.600e-02s, tot 1.606e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.059e+01 7.031e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.835e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.800e+00 x, min 1.600e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) -#pr: Record collection took 0.57 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.08 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.08 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.893e+03, min 2.013e+02, max 3.685e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.890e+00, min 4.417e-02, max 1.388e+01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.775e-07s, min 5.211e-08s, max 9.537e-07s, tot 1.110e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 2.162e-03s, min 6.914e-06s, max 4.347e-03s, tot 8.647e-03s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.888e+00 7.115e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.694e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.055e+00 x, min 1.093e-01 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) +#pr: Record collection took 0.49 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 100 environment variables in 4726 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 100 environment variables in 4779 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1740464014_gcc-14.2-1,4th.rpr -# Removing the temporary record file rsbench_pr__1740464014_gcc-14.2-1,4th.rpr.tmp. -# terminating run at 1740464020 (after 6.3s of w.c.t.) +#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1774875110_gcc-14.2-1,4th.rpr +# Removing the temporary record file rsbench_pr__1774875110_gcc-14.2-1,4th.rpr.tmp. +# terminating run at 1774875117 (after 7.4s of w.c.t.) + ./rsbench -oa -Ob --help /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench is a swiss army knife for testing the library functionality and performance. You can use it to perform sparse matrix - unitary vector multiplication, specifying the blocking parameters, the times to perform multiplication. @@ -4048,38 +4084,49 @@ Written by michelemartone_AT_users_DOT_sourceforge_DOT_net. + ./rsbench -I -cache block size : 40329 -hwloc size of cache level 1: 65536 -hwloc size of cache level 2: 524288 -detected max available cores/threads : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 -detected max OpenMP procs : 13 +cache block size : 174762 +hwloc size of cache level 1: 32768 +hwloc size of cache level 2: 4194304 +detected max available cores/threads : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 +detected max OpenMP procs : 24 detected 2 levels of cache -L1 size: 65536 -L2 size: 524288 +L1 size: 32768 +L2 size: 4194304 sysconf() : 4096 bytes per pagesize -sysconf() : 2028579 physical pages -sysconf() : 4014092288 bytes (3828 MB) of physical memory -sysconf() : 600892 available (free) physical pages -sysconf() : 2461253632 available (free) physical memory -sysconf() , processors : 64 -sysconf() , processors online : 13 -sysconf() : level 1 cache size 65536 -sysconf() : level 1 cache associativity 2 +sysconf() : 2024092 physical pages +sysconf() : 3995713536 bytes (3810 MB) of physical memory +sysconf() : 659182 available (free) physical pages +sysconf() : 2700009472 available (free) physical memory +sysconf() , processors : 128 +sysconf() , processors online : 24 +sysconf() : level 1 cache size 32768 +sysconf() : level 1 cache associativity 8 sysconf() : level 1 cache line size 64 -sysconf() : level 2 cache size 524288 -sysconf() : level 2 cache associativity 16 +sysconf() : level 2 cache size 2097152 +sysconf() : level 2 cache associativity 8 sysconf() : level 2 cache line size 64 sysconf() : no level 3 cache sysconf() : no level 4 cache @@ -4101,30 +4148,33 @@ RSB_SUBM_IDX_MARKER : 2147483647 RSB_MAX_ALLOCATABLE_MEMORY_CHUNK: 4294967295 timing min delta (if negative, don't complain with us) : 0 s -timing granularity : 5.31399e-07 s +timing granularity : 5.08547e-08 s CFLAGS : -g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 CXXFLAGS : -g -O2 -fstack-protector-strong -Wformat -Werror=format-security -fopenmp CC : gcc -memhinfo : L2:16/64/512K,L1:2/64/64K -detected free memory : -1833713664 -detected total memory : -280875008 -for array sized 524288 elems, took 0.000688076 s for linear search and 0 s for binary search for element 524287, in 139 tries, for a total of 0.100562 s (ignore this:145751786) -for array sized 524288 elems, took 0.000320911 s for linear search and 0 s for binary search for element 262143, in 282 tries, for a total of 0.100070 s (ignore this:293600438) -for array sized 524288 elems, took 0.000146866 s for linear search and 0 s for binary search for element 131071, in 563 tries, for a total of 0.100018 s (ignore this:441186384) -for array sized 524288 elems, took 5.79357e-05 s for linear search and 0 s for binary search for element 65535, in 1120 tries, for a total of 0.100074 s (ignore this:587984784) -for array sized 524288 elems, took 1.28746e-05 s for linear search and 0 s for binary search for element 32767, in 2202 tries, for a total of 0.100027 s (ignore this:732290652) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 16383, in 4284 tries, for a total of 0.100028 s (ignore this:872660196) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 8191, in 7799 tries, for a total of 0.100001 s (ignore this:1000423414) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 4095, in 13840 tries, for a total of 0.100004 s (ignore this:1113773014) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 2047, in 25145 tries, for a total of 0.100017 s (ignore this:1216716644) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 1023, in 39390 tries, for a total of 0.100003 s (ignore this:1297308584) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 511, in 54611 tries, for a total of 0.100002 s (ignore this:1353121026) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 255, in 66931 tries, for a total of 0.100001 s (ignore this:1387255836) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 127, in 75331 tries, for a total of 0.100000 s (ignore this:1406389910) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 63, in 81561 tries, for a total of 0.100001 s (ignore this:1416666596) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 31, in 82871 tries, for a total of 0.100001 s (ignore this:1421804598) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 15, in 84842 tries, for a total of 0.100000 s (ignore this:1424349858) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 7, in 86692 tries, for a total of 0.100001 s (ignore this:1425563546) +memhinfo : L2:16/64/4M,L1:8/64/32K +detected free memory : -1594957824 +detected total memory : -299253760 +for array sized 4194304 elems, took 0.00146294 s for linear search and 0 s for binary search for element 4194303, in 66 tries, for a total of 0.100041 s (ignore this:553647996) +for array sized 4194304 elems, took 0.000726938 s for linear search and 0 s for binary search for element 2097151, in 137 tries, for a total of 0.100698 s (ignore this:1128267370) +for array sized 4194304 elems, took 0.000362873 s for linear search and 0 s for binary search for element 1048575, in 265 tries, for a total of 0.100159 s (ignore this:1684012120) +for array sized 4194304 elems, took 0.00018096 s for linear search and 0 s for binary search for element 524287, in 527 tries, for a total of 0.100153 s (ignore this:-2058356678) +for array sized 4194304 elems, took 9.08375e-05 s for linear search and 0 s for binary search for element 262143, in 1080 tries, for a total of 0.100022 s (ignore this:-1492127798) +for array sized 4194304 elems, took 4.48227e-05 s for linear search and 0 s for binary search for element 131071, in 2175 tries, for a total of 0.100037 s (ignore this:-921968948) +for array sized 4194304 elems, took 2.19345e-05 s for linear search and 0 s for binary search for element 65535, in 4334 tries, for a total of 0.100012 s (ignore this:-353911568) +for array sized 4194304 elems, took 1.09673e-05 s for linear search and 0 s for binary search for element 32767, in 8626 tries, for a total of 0.100010 s (ignore this:211384716) +for array sized 4194304 elems, took 4.76837e-06 s for linear search and 0 s for binary search for element 16383, in 17033 tries, for a total of 0.100004 s (ignore this:769487994) +for array sized 4194304 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 8191, in 33138 tries, for a total of 0.100003 s (ignore this:1312354710) +for array sized 4194304 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 4095, in 63304 tries, for a total of 0.100002 s (ignore this:1830814470) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 2047, in 115368 tries, for a total of 0.100001 s (ignore this:-1991836234) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 1023, in 196664 tries, for a total of 0.100000 s (ignore this:-1589461690) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 511, in 300710 tries, for a total of 0.100001 s (ignore this:-1282136070) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 255, in 416390 tries, for a total of 0.100001 s (ignore this:-1069777170) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 127, in 514536 tries, for a total of 0.100001 s (ignore this:-939085026) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 63, in 584152 tries, for a total of 0.100000 s (ignore this:-865481874) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 31, in 686091 tries, for a total of 0.100001 s (ignore this:-822944232) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 15, in 737007 tries, for a total of 0.100000 s (ignore this:-800834022) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 7, in 756654 tries, for a total of 0.100000 s (ignore this:-790240866) + ./rsbench -C /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench version: 1.3.0 format switches:br @@ -4151,7 +4201,7 @@ RSB_CONST_MAX_SUPPORTED_CORES:128 RSB_BLAS_MATRICES_MAX:2147482623 RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH:2 -RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/512K,L1:2/64/64K +RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/4096K,L1:8/64/32K RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t):65535 RSB_IOLEVEL:7 LIBRSBPP support: on. @@ -4171,17 +4221,17 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740464023 +# beginning run at 1774875120 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx --verbose --nrhs 1,4 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# User did not specify threads; assuming 1. Environment provides max 13 threads; this build supports max 128. -# User did not specify threads; assuming 1. Environment provides max 13 threads; this build supports max 128. -# average timer granularity: 5.31e-07 s -# Will write a final performance record to file rsbench_pr__1740464023_gcc-14.2.rpr and periodic checkpoints to rsbench_pr__1740464023_gcc-14.2.rpr.tmp +# User did not specify threads; assuming 1. Environment provides max 24 threads; this build supports max 128. +# User did not specify threads; assuming 1. Environment provides max 24 threads; this build supports max 128. +# average timer granularity: 5.08e-08 s +# Will write a final performance record to file rsbench_pr__1774875120_gcc-14.2.rpr and periodic checkpoints to rsbench_pr__1774875120_gcc-14.2.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -4220,835 +4270,835 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos12-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.486s +# Memory benchmark took 6.751s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 16 samples (4032 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 5.508s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 6.753s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type D... -# file input of A.mtx took 0.00 s (6 nnz, 36367 nnz/s ) (1.12 MB/s ) -#pre-sorting (6 elements) took 0.0139871 s -#weeding duplicates (to 6 elements) took 2.86102e-06 s (and check, 2.14577e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 68947 nnz/s ) (2.11 MB/s ) +#pre-sorting (6 elements) took 0.00436306 s +#weeding duplicates (to 6 elements) took 1.90735e-06 s (and check, 9.53674e-07 s ) # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.103s): (3 x 3)[0x57553c70]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.007s): (3 x 3)[0x581e3b10]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.05597s; avg 0.01866s ( +/- 14.93/ 29.18 %); best 0.01587s; worst 0.0241s; std dev. 0.00385 (taking best). -Reference operation time is 0.015871 s (0.001512 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01587 Mflops: 0.002) -Merge (3 -> 1 leaves) took w.c.t. of 3.099e-05s, ~1.001e-05s of computing time (of which 2.146e-06s sorting, 5.007e-06s analysis) -3 iterations (13 th.) took 2.789e-05s; avg 9.298e-06s ( +/- 89.74/169.23 %); best 9.537e-07s; worst 2.503e-05s; std dev. 1.113e-05 (taking best). -Reference operation time is 9.53674e-07 s (25.17 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 25.166 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 16642.000x: 0.01587s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 0.000129s; avg 4.299e-05s ( +/- 72.27/ 76.89 %); best 1.192e-05s; worst 7.606e-05s; std dev. 2.622e-05 (taking best). +Reference operation time is 1.19209e-05 s (2.013 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.192e-05 Mflops: 2.013) +Merge (3 -> 1 leaves) took w.c.t. of 1.383e-05s, ~6.199e-06s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 2.408e-05s; avg 8.027e-06s ( +/- 99.37/188.12 %); best 5.081e-08s; worst 2.313e-05s; std dev. 1.068e-05 (taking best). +Reference operation time is 5.0807e-08 s (472.4 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 472.376 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 234.632x: 1.192e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 3.91e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 2.146e-06s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.04798s, equivalent to 50312.8/3.0 new/old ops (0.09583s for 2 clones -- as 100489.2/6.0 ops, or 50244.6/3.0 ops per clone), SPEEDUP of 16642.000x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 16642.000x (0.01587s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01587s per op. -In 1 tuning rounds (tot. 0.15s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1664100.0% (1.664e+04x) (from 0.001512 to 25.17 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 7.701e-05s (of which 1.884e-05s partitioning, 0s I/O); computing times: 6.199e-06s in par. loops, 1.907e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 7.701e-05s, equivalent to 1515.7/6.5 new/old ops (4.196e-05s for 2 clones -- as 825.9/3.5 ops, or 413.0/1.8 ops per clone), SPEEDUP of 234.632x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 234.632x (1.192e-05s -> 5.081e-08s), will amortize in 6.5 ops by saving 1.187e-05s per op. +In 1 tuning rounds (tot. 0.00027s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 23363.2% (234.6x) (from 2.013 to 472.4 Mflops). #pr: updating sample at index 1 (0^th of 16), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.152023 s (1.587e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000292063 s (1.192e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.507962 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.00048995 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000001 0.039468 0.032001 0.071469 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.071470 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.039468 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.032001 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.071469 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.006800 0.000016 0.006816 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.006816 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.006800 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000016 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.006816 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.103483 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 9.53674e-07 0.0394681 0 0.032001 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 0.00684309 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 0.00679994 0 1.5974e-05 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.051s): (3 x 3)[0x57553c70]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.000s): (3 x 3)[0x581e3b10]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.024s; avg 0.008s ( +/- 0.41/ 0.41 %); best 0.007967s; worst 0.008033s; std dev. 2.677e-05 (taking best). -Reference operation time is 0.00796723 s (0.01205 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.007967 Mflops: 0.012) -Merge (3 -> 1 leaves) took w.c.t. of 1.907e-05s, ~6.914e-06s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) -3 iterations (13 th.) took 6.914e-06s; avg 2.305e-06s ( +/- 58.62/ 65.52 %); best 9.537e-07s; worst 3.815e-06s; std dev. 1.173e-06 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 8354.250x: 0.007967s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 5.507e-05s; avg 1.836e-05s ( +/- 33.77/ 46.75 %); best 1.216e-05s; worst 2.694e-05s; std dev. 6.266e-06 (taking best). +Reference operation time is 1.21593e-05 s (7.895 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.216e-05 Mflops: 7.895) +Merge (3 -> 1 leaves) took w.c.t. of 9.06e-06s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 98.32/168.42 %); best 5.081e-08s; worst 8.106e-06s; std dev. 3.618e-06 (taking best). +Reference operation time is 5.0807e-08 s (1890 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 1889.503 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 239.324x: 1.216e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.02399s (of which 2.384e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 1.907e-06s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.02399s, equivalent to 25155.5/3.0 new/old ops (0.0479s for 2 clones -- as 50225.5/6.0 ops, or 25112.8/3.0 ops per clone), SPEEDUP of 8354.250x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 8354.250x (0.007967s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.007966s per op. -In 1 tuning rounds (tot. 0.072s, 0.048s for constructor, 2 clones) obtained a SPEEDUP of 835325.0% (8354x) (from 0.01205 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.698e-05s (of which 1.216e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 1.192e-06s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 5.698e-05s, equivalent to 1121.5/4.7 new/old ops (3.719e-05s for 2 clones -- as 732.1/3.1 ops, or 366.0/1.5 ops per clone), SPEEDUP of 239.324x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 239.324x (1.216e-05s -> 5.081e-08s), will amortize in 4.7 ops by saving 1.211e-05s per op. +In 1 tuning rounds (tot. 0.00016s, 3.7e-05s for constructor, 2 clones) obtained a SPEEDUP of 23832.4% (239.3x) (from 7.895 to 1890 Mflops). #pr: updating sample at index 9 (1^th of 16), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.0720181 s (7.967e-03 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000174999 s (1.216e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.388569 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000550032 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.019365 0.015987 0.035352 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.035352 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.019365 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.015987 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.035352 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.000026 0.000012 0.000038 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000038 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.000026 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000012 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000038 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.0513601 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.0193651 0 0.0159869 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 5.50747e-05 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 2.59876e-05 0 1.21593e-05 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 6.915s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.121s/0.000s . +# so far, program took 6.835s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.002s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.09192s (system CPU time used) -ru_utime : 20.93s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1s (system CPU time used) +ru_utime : 7.878s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 6.915s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.121s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 6.835s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.002s/0.000s . # Reusing type converted (D->S) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.076s): (3 x 3)[0x57553c70]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.001s): (3 x 3)[0x581e3b10]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.028s; avg 0.009333s ( +/- 14.31/ 28.53 %); best 0.007997s; worst 0.012s; std dev. 0.001883 (taking best). -Reference operation time is 0.00799704 s (0.003001 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.007997 Mflops: 0.003) -Merge (3 -> 1 leaves) took w.c.t. of 2.217e-05s, ~1.121e-05s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (13 th.) took 1.287e-05s; avg 4.292e-06s ( +/- 77.78/155.56 %); best 9.537e-07s; worst 1.097e-05s; std dev. 4.72e-06 (taking best). -Reference operation time is 9.53674e-07 s (25.17 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 25.166 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 8385.500x: 0.007997s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 7.01e-05s; avg 2.337e-05s ( +/- 31.63/ 45.92 %); best 1.597e-05s; worst 3.409e-05s; std dev. 7.765e-06 (taking best). +Reference operation time is 1.5974e-05 s (1.502 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.597e-05 Mflops: 1.502) +Merge (3 -> 1 leaves) took w.c.t. of 9.06e-06s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 2.289e-05s; avg 7.629e-06s ( +/- 99.33/200.00 %); best 5.081e-08s; worst 2.289e-05s; std dev. 1.079e-05 (taking best). +Reference operation time is 5.0807e-08 s (472.4 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 472.376 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 314.406x: 1.597e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.03199s (of which 2.813e-05s partitioning, 0s I/O); computing times: 1.121e-05s in par. loops, 1.907e-06s sorting, 2.146e-06s analyzing) -Total merge + benchmarking process took 0.03199s, equivalent to 33540.8/4.0 new/old ops (0.07988s for 2 clones -- as 83759.2/10.0 ops, or 41879.6/5.0 ops per clone), SPEEDUP of 8385.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 8385.500x (0.007997s -> 9.537e-07s), will amortize in 4.0 ops by saving 0.007996s per op. -In 1 tuning rounds (tot. 0.11s, 0.08s for constructor, 2 clones) obtained a SPEEDUP of 838450.0% (8386x) (from 0.003001 to 25.17 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.914e-05s (of which 1.216e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 6.914e-05s, equivalent to 1360.9/4.3 new/old ops (3.624e-05s for 2 clones -- as 713.3/2.3 ops, or 356.6/1.1 ops per clone), SPEEDUP of 314.406x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 314.406x (1.597e-05s -> 5.081e-08s), will amortize in 4.3 ops by saving 1.592e-05s per op. +In 1 tuning rounds (tot. 0.00019s, 3.6e-05s for constructor, 2 clones) obtained a SPEEDUP of 31340.6% (314.4x) (from 1.502 to 472.4 Mflops). #pr: updating sample at index 3 (2^th of 16), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.1082 s (7.997e-03 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.00020504 s (1.597e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.52376 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000735998 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.027567 0.020017 0.047584 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.047584 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.027567 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.020017 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.047584 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.000840 0.000012 0.000852 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000852 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.000840 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000012 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000852 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.0755961 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.0275669 0 0.0200171 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 0.000868797 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 0.000840187 0 1.19209e-05 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.084s): (3 x 3)[0x57553f30]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.000s): (3 x 3)[0x581e3b10]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.04799s; avg 0.016s ( +/- 0.21/ 0.19 %); best 0.01596s; worst 0.01603s; std dev. 2.629e-05 (taking best). -Reference operation time is 0.0159628 s (0.006014 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01596 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 2.599e-05s, ~1.311e-05s of computing time (of which 2.146e-06s sorting, 1.907e-06s analysis) -3 iterations (13 th.) took 8.106e-06s; avg 2.702e-06s ( +/- 64.71/ 85.29 %); best 9.537e-07s; worst 5.007e-06s; std dev. 1.701e-06 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 16738.250x: 0.01596s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 9.99e-05s; avg 3.33e-05s ( +/- 57.76/ 34.61 %); best 1.407e-05s; worst 4.482e-05s; std dev. 1.369e-05 (taking best). +Reference operation time is 1.40667e-05 s (6.825 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.407e-05 Mflops: 6.825) +Merge (3 -> 1 leaves) took w.c.t. of 6.914e-06s, ~3.099e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 98.32/160.53 %); best 5.081e-08s; worst 7.868e-06s; std dev. 3.462e-06 (taking best). +Reference operation time is 5.0807e-08 s (1890 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 1889.503 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 276.865x: 1.407e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04824s (of which 3.099e-05s partitioning, 0s I/O); computing times: 1.311e-05s in par. loops, 2.146e-06s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.04824s, equivalent to 50584.2/3.0 new/old ops (0.0959s for 2 clones -- as 100561.5/6.0 ops, or 50280.8/3.0 ops per clone), SPEEDUP of 16738.250x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 16738.250x (0.01596s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01596s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1673725.0% (1.674e+04x) (from 0.006014 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.485e-05s (of which 1.001e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 0s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 6.485e-05s, equivalent to 1276.4/4.6 new/old ops (3.481e-05s for 2 clones -- as 685.1/2.5 ops, or 342.6/1.2 ops per clone), SPEEDUP of 276.865x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 276.865x (1.407e-05s -> 5.081e-08s), will amortize in 4.6 ops by saving 1.402e-05s per op. +In 1 tuning rounds (tot. 0.00021s, 3.5e-05s for constructor, 2 clones) obtained a SPEEDUP of 27586.5% (276.9x) (from 6.825 to 1890 Mflops). #pr: updating sample at index 11 (3^th of 16), 0^th touch for (0,0,0,0,1,1,0). -First run of RSB Autotuner took 0.144303 s (1.596e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000224829 s (1.407e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.391889 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000478983 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.027713 0.023986 0.051699 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.051699 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.027713 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.023986 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.051699 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.000028 0.000012 0.000040 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000040 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.000028 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000012 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000040 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.083694 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.0277128 0 0.0239859 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 5.98431e-05 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 2.81334e-05 0 1.19209e-05 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 8.334s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.289s/0.000s . +# so far, program took 6.911s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.003s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.1118s (system CPU time used) -ru_utime : 37.15s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1554s (system CPU time used) +ru_utime : 8.905s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was S). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 8.335s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.289s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 6.911s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.003s/0.000s . # Reusing type converted (S->C) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.088s): (3 x 3)[0x57556500]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.000s): (3 x 3)[0x581e63a0]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.02934s; avg 0.00978s ( +/- 18.18/ 36.28 %); best 0.008002s; worst 0.01333s; std dev. 0.002509 (taking best). -Reference operation time is 0.0080018 s (0.012 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.008002 Mflops: 0.012) -Merge (3 -> 1 leaves) took w.c.t. of 3.91e-05s, ~1.502e-05s of computing time (of which 3.815e-06s sorting, 2.861e-06s analysis) -3 iterations (13 th.) took 0.001004s; avg 0.0003347s ( +/- 99.43/198.79 %); best 1.907e-06s; worst 0.0009999s; std dev. 0.0004704 (taking best). -Reference operation time is 1.90735e-06 s (50.33 Mflops) with 13 threads. -After merge step 1: tpop: 1.907e-06 s ~Mflops: 50.332 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 4195.250x: 0.008002s -> 1.907e-06s, so taking this instance. +3 iterations (24 th.) took 0.000149s; avg 4.967e-05s ( +/- 77.92/111.20 %); best 1.097e-05s; worst 0.0001049s; std dev. 4.009e-05 (taking best). +Reference operation time is 1.09673e-05 s (8.753 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.097e-05 Mflops: 8.753) +Merge (3 -> 1 leaves) took w.c.t. of 7.868e-06s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 1.907e-05s; avg 6.358e-06s ( +/- 99.20/181.25 %); best 5.081e-08s; worst 1.788e-05s; std dev. 8.163e-06 (taking best). +Reference operation time is 5.0807e-08 s (1890 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 1889.503 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 215.861x: 1.097e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.02399s (of which 4.506e-05s partitioning, 0s I/O); computing times: 1.502e-05s in par. loops, 3.815e-06s sorting, 2.861e-06s analyzing) -Total merge + benchmarking process took 0.02399s, equivalent to 12578.2/3.0 new/old ops (0.06551s for 2 clones -- as 34347.1/8.2 ops, or 17173.6/4.1 ops per clone), SPEEDUP of 4195.250x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 4195.250x (0.008002s -> 1.907e-06s), will amortize in 3.0 ops by saving 0.008s per op. -In 1 tuning rounds (tot. 0.096s, 0.066s for constructor, 2 clones) obtained a SPEEDUP of 419425.0% (4195x) (from 0.012 to 50.33 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.39e-05s (of which 1.097e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 1.192e-06s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 6.39e-05s, equivalent to 1257.6/5.8 new/old ops (3.695e-05s for 2 clones -- as 727.4/3.4 ops, or 363.7/1.7 ops per clone), SPEEDUP of 215.861x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 215.861x (1.097e-05s -> 5.081e-08s), will amortize in 5.9 ops by saving 1.092e-05s per op. +In 1 tuning rounds (tot. 0.00026s, 3.7e-05s for constructor, 2 clones) obtained a SPEEDUP of 21486.1% (215.9x) (from 8.753 to 1890 Mflops). #pr: updating sample at index 5 (4^th of 16), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.09604 s (8.002e-03 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000276804 s (1.097e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.663928 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000723839 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.031621 0.028120 0.059741 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.059741 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.031621 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.028120 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.059741 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.000026 0.000012 0.000038 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000038 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.000026 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000012 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000038 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.087635 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.031621 0 0.02812 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 5.60284e-05 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 2.59876e-05 0 1.21593e-05 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.096s): (3 x 3)[0x57556500]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.028s): (3 x 3)[0x581e63a0]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.04799s; avg 0.016s ( +/- 0.07/ 0.14 %); best 0.01599s; worst 0.01602s; std dev. 1.585e-05 (taking best). -Reference operation time is 0.015986 s (0.02402 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.024) -Merge (3 -> 1 leaves) took w.c.t. of 2.098e-05s, ~8.106e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) -3 iterations (13 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 36.84/ 65.79 %); best 1.907e-06s; worst 5.007e-06s; std dev. 1.408e-06 (taking best). -Reference operation time is 1.90735e-06 s (201.3 Mflops) with 13 threads. -After merge step 1: tpop: 1.907e-06 s ~Mflops: 201.327 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 8381.250x: 0.01599s -> 1.907e-06s, so taking this instance. +3 iterations (24 th.) took 0.01103s; avg 0.003676s ( +/- 38.62/ 19.48 %); best 0.002256s; worst 0.004392s; std dev. 0.001004 (taking best). +Reference operation time is 0.00225616 s (0.1702 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.002256 Mflops: 0.170) +Merge (3 -> 1 leaves) took w.c.t. of 1.097e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 1.383e-05s; avg 4.609e-06s ( +/- 98.90/179.31 %); best 5.081e-08s; worst 1.287e-05s; std dev. 5.857e-06 (taking best). +Reference operation time is 5.0807e-08 s (7558 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 7558.014 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 44406.382x: 0.002256s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 2.694e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) -Total merge + benchmarking process took 0.04798s, equivalent to 25153.8/3.0 new/old ops (0.0919s for 2 clones -- as 48181.0/5.7 ops, or 24090.5/2.9 ops per clone), SPEEDUP of 8381.250x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 8381.250x (0.01599s -> 1.907e-06s), will amortize in 3.0 ops by saving 0.01598s per op. -In 1 tuning rounds (tot. 0.14s, 0.092s for constructor, 2 clones) obtained a SPEEDUP of 838025.0% (8381x) (from 0.02402 to 201.3 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.0111s (of which 1.478e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 0.0111s, equivalent to 218550.0/4.9 new/old ops (0.02622s for 2 clones -- as 516072.3/11.6 ops, or 258036.1/5.8 ops per clone), SPEEDUP of 44406.382x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 44406.382x (0.002256s -> 5.081e-08s), will amortize in 4.9 ops by saving 0.002256s per op. +In 1 tuning rounds (tot. 0.037s, 0.026s for constructor, 2 clones) obtained a SPEEDUP of 4440538.2% (4.441e+04x) (from 0.1702 to 7558 Mflops). #pr: updating sample at index 13 (5^th of 16), 0^th touch for (0,0,0,0,1,2,0). -First run of RSB Autotuner took 0.140022 s (1.599e-02 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0373549 s (2.256e-03 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.779954 s and estimated a speedup of 1.000000 x (1.907e-06 s -> 1.907e-06 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.1126 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.031568 0.032001 0.063569 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.063569 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.031568 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.032001 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.063569 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000001 0.010023 0.010814 0.020837 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.020838 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.010023 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.010814 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.020837 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.0955789 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.0315681 0 0.032001 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 0.027545 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 9.53674e-07 0.0100229 0 0.010814 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 10.359s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.969s/0.000s . +# so far, program took 7.171s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.154s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.1478s (system CPU time used) -ru_utime : 60.16s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.2112s (system CPU time used) +ru_utime : 14.09s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was C). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 10.359s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.969s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 7.171s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.154s/0.000s . # Reusing type converted (C->Z) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.015s): (3 x 3)[0x57556500]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.003s): (3 x 3)[0x581e63a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 6.008e-05s; avg 2.003e-05s ( +/- 29.76/ 34.52 %); best 1.407e-05s; worst 2.694e-05s; std dev. 5.299e-06 (taking best). -Reference operation time is 1.40667e-05 s (6.825 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.407e-05 Mflops: 6.825) -Merge (3 -> 1 leaves) took w.c.t. of 1.717e-05s, ~7.153e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (13 th.) took 2.384e-05s; avg 7.947e-06s ( +/- 88.00/176.00 %); best 9.537e-07s; worst 2.193e-05s; std dev. 9.89e-06 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 14.750x: 1.407e-05s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 0.000129s; avg 4.299e-05s ( +/- 62.85/ 67.47 %); best 1.597e-05s; worst 7.2e-05s; std dev. 2.292e-05 (taking best). +Reference operation time is 1.5974e-05 s (6.01 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.597e-05 Mflops: 6.010) +Merge (3 -> 1 leaves) took w.c.t. of 9.06e-06s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 1.383e-05s; avg 4.609e-06s ( +/- 98.90/179.31 %); best 5.081e-08s; worst 1.287e-05s; std dev. 5.857e-06 (taking best). +Reference operation time is 5.0807e-08 s (1890 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 1889.503 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 314.406x: 1.597e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.0001061s (of which 2.217e-05s partitioning, 0s I/O); computing times: 7.153e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.0001061s, equivalent to 111.2/7.5 new/old ops (7.606e-05s for 2 clones -- as 79.8/5.4 ops, or 39.9/2.7 ops per clone), SPEEDUP of 14.750x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 14.750x (1.407e-05s -> 9.537e-07s), will amortize in 8.1 ops by saving 1.311e-05s per op. -In 1 tuning rounds (tot. 0.00024s, 7.6e-05s for constructor, 2 clones) obtained a SPEEDUP of 1375.0% (14.75x) (from 6.825 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.199e-05s (of which 1.287e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 6.199e-05s, equivalent to 1220.1/3.9 new/old ops (3.719e-05s for 2 clones -- as 732.1/2.3 ops, or 366.0/1.2 ops per clone), SPEEDUP of 314.406x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 314.406x (1.597e-05s -> 5.081e-08s), will amortize in 3.9 ops by saving 1.592e-05s per op. +In 1 tuning rounds (tot. 0.00024s, 3.7e-05s for constructor, 2 clones) obtained a SPEEDUP of 31340.6% (314.4x) (from 6.01 to 1890 Mflops). #pr: updating sample at index 7 (6^th of 16), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.000261068 s (1.407e-05 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000257015 s (1.597e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.00250697 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.00111508 s and estimated a speedup of 1.000000 x (5.081e-08 s -> 5.081e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000001 0.015018 0.000029 0.015047 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.015048 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.015018 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.000029 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.015047 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.002640 0.000014 0.002654 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.002654 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.002640 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000014 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.002654 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.015084 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 9.53674e-07 0.015018 0 2.88486e-05 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 0.00267291 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 0.00264001 0 1.38283e-05 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 13 threads -# Constructed matrix (took 0.103s): (3 x 3)[0x57556500]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 24 threads +# Constructed matrix (took 0.000s): (3 x 3)[0x581e63a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (13 th.) took 0.04798s; avg 0.01599s ( +/- 0.08/ 0.05 %); best 0.01598s; worst 0.016s; std dev. 8.978e-06 (taking best). -Reference operation time is 0.0159791 s (0.02403 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01598 Mflops: 0.024) -Merge (3 -> 1 leaves) took w.c.t. of 2.003e-05s, ~5.96e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) -3 iterations (13 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 36.84/ 65.79 %); best 1.907e-06s; worst 5.007e-06s; std dev. 1.408e-06 (taking best). -Reference operation time is 1.90735e-06 s (201.3 Mflops) with 13 threads. -After merge step 1: tpop: 1.907e-06 s ~Mflops: 201.327 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 8377.625x: 0.01598s -> 1.907e-06s, so taking this instance. +3 iterations (24 th.) took 0.0001261s; avg 4.204e-05s ( +/- 68.81/ 66.73 %); best 1.311e-05s; worst 7.01e-05s; std dev. 2.327e-05 (taking best). +Reference operation time is 1.3113e-05 s (29.28 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.311e-05 Mflops: 29.284) +Merge (3 -> 1 leaves) took w.c.t. of 7.868e-06s, ~3.099e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 1.097e-05s; avg 3.656e-06s ( +/- 98.61/173.91 %); best 5.081e-08s; worst 1.001e-05s; std dev. 4.512e-06 (taking best). +Reference operation time is 5.0807e-08 s (7558 Mflops) with 24 threads. +After merge step 1: tpop: 5.081e-08 s ~Mflops: 7558.014 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 258.095x: 1.311e-05s -> 5.081e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 2.503e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) -Total merge + benchmarking process took 0.04799s, equivalent to 25159.5/3.0 new/old ops (0.09592s for 2 clones -- as 50287.6/6.0 ops, or 25143.8/3.0 ops per clone), SPEEDUP of 8377.625x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 8377.625x (0.01598s -> 1.907e-06s), will amortize in 3.0 ops by saving 0.01598s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 837662.5% (8378x) (from 0.02403 to 201.3 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.794e-05s (of which 1.192e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 9.537e-07s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 5.794e-05s, equivalent to 1140.3/4.4 new/old ops (3.91e-05s for 2 clones -- as 769.6/3.0 ops, or 384.8/1.5 ops per clone), SPEEDUP of 258.095x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 258.095x (1.311e-05s -> 5.081e-08s), will amortize in 4.4 ops by saving 1.306e-05s per op. +In 1 tuning rounds (tot. 0.00024s, 3.9e-05s for constructor, 2 clones) obtained a SPEEDUP of 25709.5% (258.1x) (from 29.28 to 7558 Mflops). #pr: updating sample at index 15 (7^th of 16), 0^th touch for (0,0,0,0,1,3,0). -First run of RSB Autotuner took 0.144011 s (1.598e-02 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.000260115 s (1.311e-05 s -> 5.081e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.608567 s and estimated a speedup of 1.000000 x (1.907e-06 s -> 1.907e-06 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.000759125 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 13 3 3 6 0.000000 0.039395 0.032007 0.071402 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.071402 -%:RSB_SUBDIVISION_TIME:A.mtx S N 13 3 3 6 0.039395 -%:RSB_SHUFFLE_TIME:A.mtx S N 13 3 3 6 0.032007 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 13 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 13 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 13 3 3 6 0.071402 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 13 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 13 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 13 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 13 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 24 3 3 6 0.000000 0.000027 0.000013 0.000040 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000040 +%:RSB_SUBDIVISION_TIME:A.mtx S N 24 3 3 6 0.000027 +%:RSB_SHUFFLE_TIME:A.mtx S N 24 3 3 6 0.000013 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 24 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 24 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 24 3 3 6 0.000040 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 24 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 24 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 24 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 24 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 13 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 13 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 13 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 13 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 13 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 24 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 24 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 24 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 24 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 24 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[13] SPMV[13] SPMV[13] -%operation:A.mtx 0.103398 1e+09 1e+09 -%constructor:matrix SORT[13] SCAN[13] SHUFFLE[13] INSERT[13] -%constructor:A.mtx 0 0.0393949 0 0.032007 +%operation:matrix CONSTRUCTOR[24] SPMV[24] SPMV[24] +%operation:A.mtx 5.79357e-05 1e+09 1e+09 +%constructor:matrix SORT[24] SCAN[24] SHUFFLE[24] INSERT[24] +%constructor:A.mtx 0 2.69413e-05 0 1.3113e-05 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 11.321s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.724s/0.000s . +# so far, program took 7.248s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.156s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.1917s (system CPU time used) -ru_utime : 70.91s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.2634s (system CPU time used) +ru_utime : 15.18s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to type D: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 13 13 0 4.0000 4.6667 3 1 25.17 1.587e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 1.55e-01 5.17e+00 1 2.40e-05 -pr: 9:R_R A 3 3 6 4 D S N 13 13 0 4.0000 4.6667 3 1 100.66 7.967e-03 0.000e+00 9.537e-07 0.000e+00 7.202e-02 3.82e-01 2.79e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 24 24 0 4.0000 4.6667 3 1 472.38 1.192e-05 0.000e+00 5.081e-08 0.000e+00 2.921e-04 2.91e+00 5.17e+00 1 2.40e-05 +pr: 9:R_R A 3 3 6 4 D S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.216e-05 0.000e+00 5.081e-08 0.000e+00 1.750e-04 7.16e+00 2.79e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1249712.5 % faster, avg. sp. ratio 12498.125x, max sp. ratio 16642.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 117462.1/75516.5/159407.8/234924.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 9.6/ 18.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 9.6 ops) +#pr: (in succ. cases rsb autotuning gave avg. 23597.8 % faster, avg. sp. ratio 236.978x, max sp. ratio 239.324x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 4596.4/3444.4/5748.5/9192.9 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 19.4/ 14.4/ 24.5/ 38.9 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 19.5, min. 14.5, max. 24.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.206/ 0.130/ 0.281,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.537/ 0.155/ 0.382,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.858/ 2.441/ 5.275,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.077/ 2.913/ 7.164,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 3.979/ 2.792/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.11 s, min 0.07 s, max 0.15 s, tot 0.22 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.11 s, min 0.07 s, max 0.15 s, tot 0.22 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.781e-03, min 1.512e-03, max 1.205e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.192e-02s, min 7.967e-03s, max 1.587e-02s, tot 2.384e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.576e+01 5.567e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.181e+03, min 4.724e+02, max 1.890e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.954e+00, min 2.013e+00, max 7.895e+00 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 1.016e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.204e-05s, min 1.192e-05s, max 1.216e-05s, tot 2.408e-05s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.585e+00 9.910e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type S: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 1 S S N 13 13 0 4.0000 4.6667 3 1 25.17 7.997e-03 0.000e+00 9.537e-07 0.000e+00 1.082e-01 9.23e-02 3.17e+00 1 2.40e-05 -pr: 11:R_R A 3 3 6 4 S S N 13 13 0 4.0000 4.6667 3 1 100.66 1.596e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 1 S S N 24 24 0 4.0000 4.6667 3 1 472.38 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.050e-04 1.73e+00 3.17e+00 1 2.40e-05 +pr: 11:R_R A 3 3 6 4 S S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.407e-05 0.000e+00 5.081e-08 0.000e+00 2.248e-04 3.86e+00 1.54e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1256087.5 % faster, avg. sp. ratio 12561.875x, max sp. ratio 16738.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 132384.2/113455.8/151312.8/264768.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.3/ 9.0/ 13.5/ 22.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.3, min. 9.0, max. 13.5 ops) +#pr: (in succ. cases rsb autotuning gave avg. 29463.6 % faster, avg. sp. ratio 295.636x, max sp. ratio 314.406x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 4230.4/4035.7/4425.2/8460.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 14.4/ 12.8/ 16.0/ 28.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 14.5, min. 12.9, max. 16.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 8/ 8/ 8) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 24/ 24/ 24) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.117/ 0.080/ 0.155,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.298/ 0.092/ 0.206,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.204/ 1.496/ 2.913,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 5.590/ 1.732/ 3.858,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.354/ 1.542/ 3.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.13 s, min 0.11 s, max 0.14 s, tot 0.25 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.13 s, min 0.11 s, max 0.14 s, tot 0.25 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 4.508e-03, min 3.001e-03, max 6.014e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.198e-02s, min 7.997e-03s, max 1.596e-02s, tot 2.396e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.664e+01 9.083e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.181e+03, min 4.724e+02, max 1.890e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.164e+00, min 1.502e+00, max 6.825e+00 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 1.016e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.502e-05s, min 1.407e-05s, max 1.597e-05s, tot 3.004e-05s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 8.303e+00 1.617e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type C: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 5:R_R A 3 3 6 1 C S N 13 13 0 4.0000 4.6667 3 1 50.33 8.002e-03 0.000e+00 1.907e-06 0.000e+00 9.604e-02 7.76e-02 1.29e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 13 13 0 4.0000 4.6667 3 1 201.33 1.599e-02 0.000e+00 1.907e-06 0.000e+00 1.400e-01 1.91e-01 6.98e-01 1 3.84e-04 +pr: 5:R_R A 3 3 6 1 C S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.097e-05 0.000e+00 5.081e-08 0.000e+00 2.768e-04 2.91e+00 1.29e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 24 24 0 4.0000 4.6667 3 1 7558.01 2.256e-03 0.000e+00 5.081e-08 0.000e+00 3.735e-02 7.16e+00 6.98e-01 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 628725.0 % faster, avg. sp. ratio 6288.250x, max sp. ratio 8381.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 61882.2/50352.6/73411.9/123764.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.4/ 8.8/ 12.0/ 20.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.4, min. 8.8, max. 12.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 2231012.2 % faster, avg. sp. ratio 22311.122x, max sp. ratio 44406.382x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 370340.2/5448.1/735232.3/740680.4 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 20.9/ 16.6/ 25.2/ 41.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 21.0, min. 16.6, max. 25.4 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.103/ 0.065/ 0.141,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.268/ 0.078/ 0.191,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.858/ 2.441/ 5.275,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.077/ 2.913/ 7.164,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 0.995/ 0.698/ 1.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.12 s, min 0.10 s, max 0.14 s, tot 0.24 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.12 s, min 0.10 s, max 0.14 s, tot 0.24 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.258e+02, min 5.033e+01, max 2.013e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.801e-02, min 1.200e-02, max 2.402e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.907e-06s, min 1.907e-06s, max 1.907e-06s, tot 3.815e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.199e-02s, min 8.002e-03s, max 1.599e-02s, tot 2.399e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.151e+01 1.113e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.04 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.02 s, min 0.00 s, max 0.04 s, tot 0.04 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.724e+03, min 1.890e+03, max 7.558e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.462e+00, min 1.702e-01, max 8.753e+00 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 1.016e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.134e-03s, min 1.097e-05s, max 2.256e-03s, tot 2.267e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.585e+00 9.910e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type Z: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 7:R_R A 3 3 6 1 Z S N 13 13 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 9.537e-07 0.000e+00 2.611e-04 2.81e-01 2.29e+00 1 9.60e-05 -pr: 15:R_R A 3 3 6 4 Z S N 13 13 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.440e-01 3.67e-01 1.32e+00 1 3.84e-04 +pr: 7:R_R A 3 3 6 1 Z S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.570e-04 5.27e+00 2.29e+00 1 9.60e-05 +pr: 15:R_R A 3 3 6 4 Z S N 24 24 0 4.0000 4.6667 3 1 7558.01 1.311e-05 0.000e+00 5.081e-08 0.000e+00 2.601e-04 1.38e+01 1.32e+00 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 419518.8 % faster, avg. sp. ratio 4196.188x, max sp. ratio 8377.625x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 37888.4/273.8/75503.1/75776.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.8/ 9.0/ 18.6/ 27.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 14.5, min. 9.0, max. 19.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 28525.1 % faster, avg. sp. ratio 286.251x, max sp. ratio 314.406x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5089.2/5058.7/5119.7/10178.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 18.0/ 16.1/ 19.8/ 35.9 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 18.0, min. 16.1, max. 19.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.249/ 0.231/ 0.266,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.648/ 0.281/ 0.367,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 7.164/ 4.330/ 9.999,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 19.052/ 5.275/ 13.778,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.807/ 1.323/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.07 s, min 0.00 s, max 0.14 s, tot 0.14 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.07 s, min 0.00 s, max 0.14 s, tot 0.14 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.424e+00, min 2.403e-02, max 6.825e+00 (2 samples) -#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 2.861e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 7.997e-03s, min 1.407e-05s, max 1.598e-02s, tot 1.599e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.718e+01 3.138e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.724e+03, min 1.890e+03, max 7.558e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.765e+01, min 6.010e+00, max 2.928e+01 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 1.016e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.454e-05s, min 1.311e-05s, max 1.597e-05s, tot 2.909e-05s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.419e+00 5.585e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 13 13 0 4.0000 4.6667 3 1 25.17 1.587e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 1.55e-01 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 13 13 0 4.0000 4.6667 3 1 25.17 7.997e-03 0.000e+00 9.537e-07 0.000e+00 1.082e-01 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 13 13 0 4.0000 4.6667 3 1 50.33 8.002e-03 0.000e+00 1.907e-06 0.000e+00 9.604e-02 7.76e-02 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 13 13 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 9.537e-07 0.000e+00 2.611e-04 2.81e-01 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 24 24 0 4.0000 4.6667 3 1 472.38 1.192e-05 0.000e+00 5.081e-08 0.000e+00 2.921e-04 2.91e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 24 24 0 4.0000 4.6667 3 1 472.38 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.050e-04 1.73e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.097e-05 0.000e+00 5.081e-08 0.000e+00 2.768e-04 2.91e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.570e-04 5.27e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 730837.5 % faster, avg. sp. ratio 7309.375x, max sp. ratio 16642.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 80872.5/273.8/159407.8/323489.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.4/ 9.6/ 18.6/ 53.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 13.8, min. 9.6, max. 19.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26882.6 % faster, avg. sp. ratio 269.826x, max sp. ratio 314.406x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5072.7/4035.7/5748.5/20290.9 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 19.7/ 12.8/ 25.2/ 78.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 19.7, min. 12.9, max. 25.4 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.126/ 0.065/ 0.231,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.606/ 0.078/ 0.281,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.677/ 1.496/ 4.330,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 12.833/ 1.732/ 5.275,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.09 s, min 0.00 s, max 0.15 s, tot 0.36 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.09 s, min 0.00 s, max 0.15 s, tot 0.36 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 5.033e+01, min 2.517e+01, max 1.007e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.710e+00, min 1.512e-03, max 6.825e+00 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.192e-06s, min 9.537e-07s, max 1.907e-06s, tot 4.768e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 7.971e-03s, min 1.407e-05s, max 1.587e-02s, tot 3.188e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.138e+01 1.113e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.181e+03, min 4.724e+02, max 1.890e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.570e+00, min 1.502e+00, max 8.753e+00 (4 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 2.032e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.371e-05s, min 1.097e-05s, max 1.597e-05s, tot 5.484e-05s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.585e+00 1.617e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 #pr: ======== Limiting to nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 13 13 0 4.0000 4.6667 3 1 100.66 7.967e-03 0.000e+00 9.537e-07 0.000e+00 7.202e-02 3.82e-01 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 13 13 0 4.0000 4.6667 3 1 100.66 1.596e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 13 13 0 4.0000 4.6667 3 1 201.33 1.599e-02 0.000e+00 1.907e-06 0.000e+00 1.400e-01 1.91e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 13 13 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.440e-01 3.67e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.216e-05 0.000e+00 5.081e-08 0.000e+00 1.750e-04 7.16e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.407e-05 0.000e+00 5.081e-08 0.000e+00 2.248e-04 3.86e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 24 24 0 4.0000 4.6667 3 1 7558.01 2.256e-03 0.000e+00 5.081e-08 0.000e+00 3.735e-02 7.16e+00 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 24 24 0 4.0000 4.6667 3 1 7558.01 1.311e-05 0.000e+00 5.081e-08 0.000e+00 2.601e-04 1.38e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1046184.4 % faster, avg. sp. ratio 10462.844x, max sp. ratio 16738.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 93936.1/73411.9/151312.8/375744.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 8.8/ 9.0/ 35.9 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 8.8, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1129416.7 % faster, avg. sp. ratio 11295.167x, max sp. ratio 44406.382x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 187055.4/3444.4/735232.3/748221.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 16.7/ 14.4/ 19.8/ 66.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 16.7, min. 14.5, max. 19.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.211/ 0.141/ 0.281,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.145/ 0.191/ 0.382,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.865/ 2.913/ 9.999,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 31.964/ 3.858/ 13.778,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.13 s, min 0.07 s, max 0.14 s, tot 0.50 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.13 s, min 0.07 s, max 0.14 s, tot 0.50 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.653e-02, min 6.014e-03, max 2.403e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 5.722e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.397e-02s, min 7.967e-03s, max 1.599e-02s, tot 5.590e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.576e+01 5.151e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.01 s, min 0.00 s, max 0.04 s, tot 0.04 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.01 s, min 0.00 s, max 0.04 s, tot 0.04 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.724e+03, min 1.890e+03, max 7.558e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.104e+01, min 1.702e-01, max 2.928e+01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 2.032e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 5.739e-04s, min 1.216e-05s, max 2.256e-03s, tot 2.295e-03s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.419e+00 8.303e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 13 13 0 4.0000 4.6667 3 1 25.17 1.587e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 1.55e-01 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 13 13 0 4.0000 4.6667 3 1 25.17 7.997e-03 0.000e+00 9.537e-07 0.000e+00 1.082e-01 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 13 13 0 4.0000 4.6667 3 1 50.33 8.002e-03 0.000e+00 1.907e-06 0.000e+00 9.604e-02 7.76e-02 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 13 13 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 9.537e-07 0.000e+00 2.611e-04 2.81e-01 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 13 13 0 4.0000 4.6667 3 1 100.66 7.967e-03 0.000e+00 9.537e-07 0.000e+00 7.202e-02 3.82e-01 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 13 13 0 4.0000 4.6667 3 1 100.66 1.596e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 13 13 0 4.0000 4.6667 3 1 201.33 1.599e-02 0.000e+00 1.907e-06 0.000e+00 1.400e-01 1.91e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 13 13 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.440e-01 3.67e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 24 24 0 4.0000 4.6667 3 1 472.38 1.192e-05 0.000e+00 5.081e-08 0.000e+00 2.921e-04 2.91e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 24 24 0 4.0000 4.6667 3 1 472.38 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.050e-04 1.73e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.097e-05 0.000e+00 5.081e-08 0.000e+00 2.768e-04 2.91e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.570e-04 5.27e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.216e-05 0.000e+00 5.081e-08 0.000e+00 1.750e-04 7.16e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.407e-05 0.000e+00 5.081e-08 0.000e+00 2.248e-04 3.86e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 24 24 0 4.0000 4.6667 3 1 7558.01 2.256e-03 0.000e+00 5.081e-08 0.000e+00 3.735e-02 7.16e+00 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 24 24 0 4.0000 4.6667 3 1 7558.01 1.311e-05 0.000e+00 5.081e-08 0.000e+00 2.601e-04 1.38e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 888510.9 % faster, avg. sp. ratio 8886.109x, max sp. ratio 16738.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 87404.3/273.8/159407.8/699234.1 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.2/ 8.8/ 18.6/ 89.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.4, min. 8.8, max. 19.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 578149.6 % faster, avg. sp. ratio 5782.496x, max sp. ratio 44406.382x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 96064.1/3444.4/735232.3/768512.4 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 18.2/ 12.8/ 25.2/145.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 18.2, min. 12.9, max. 25.4 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.169/ 0.065/ 0.281,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.751/ 0.078/ 0.382,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.271/ 1.496/ 9.999,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 44.797/ 1.732/ 13.778,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.11 s, min 0.00 s, max 0.15 s, tot 0.86 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.11 s, min 0.00 s, max 0.15 s, tot 0.86 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.007e+02, min 2.517e+01, max 2.013e+02 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 8.634e-01, min 1.512e-03, max 6.825e+00 (8 samples) -#pr: best tun. rsb operation time was: on avg. 1.311e-06s, min 9.537e-07s, max 1.907e-06s, tot 1.049e-05s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.097e-02s, min 1.407e-05s, max 1.599e-02s, tot 8.778e-02s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.576e+01 1.113e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.04 s, tot 0.04 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.04 s, tot 0.04 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.952e+03, min 4.724e+02, max 7.558e+03 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 7.807e+00, min 1.702e-01, max 2.928e+01 (8 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 4.065e-07s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 2.938e-04s, min 1.097e-05s, max 2.256e-03s, tot 2.350e-03s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.419e+00 1.617e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 13 13 0 4.0000 4.6667 3 1 25.17 1.587e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 1.55e-01 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 13 13 0 4.0000 4.6667 3 1 25.17 7.997e-03 0.000e+00 9.537e-07 0.000e+00 1.082e-01 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 13 13 0 4.0000 4.6667 3 1 50.33 8.002e-03 0.000e+00 1.907e-06 0.000e+00 9.604e-02 7.76e-02 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 13 13 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 9.537e-07 0.000e+00 2.611e-04 2.81e-01 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 24 24 0 4.0000 4.6667 3 1 472.38 1.192e-05 0.000e+00 5.081e-08 0.000e+00 2.921e-04 2.91e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 24 24 0 4.0000 4.6667 3 1 472.38 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.050e-04 1.73e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.097e-05 0.000e+00 5.081e-08 0.000e+00 2.768e-04 2.91e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.570e-04 5.27e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 730837.5 % faster, avg. sp. ratio 7309.375x, max sp. ratio 16642.000x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 80872.5/273.8/159407.8/323489.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.4/ 9.6/ 18.6/ 53.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 13.8, min. 9.6, max. 19.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26882.6 % faster, avg. sp. ratio 269.826x, max sp. ratio 314.406x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5072.7/4035.7/5748.5/20290.9 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 19.7/ 12.8/ 25.2/ 78.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 19.7, min. 12.9, max. 25.4 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.126/ 0.065/ 0.231,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.606/ 0.078/ 0.281,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.677/ 1.496/ 4.330,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 12.833/ 1.732/ 5.275,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.09 s, min 0.00 s, max 0.15 s, tot 0.36 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.09 s, min 0.00 s, max 0.15 s, tot 0.36 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 5.033e+01, min 2.517e+01, max 1.007e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.710e+00, min 1.512e-03, max 6.825e+00 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.192e-06s, min 9.537e-07s, max 1.907e-06s, tot 4.768e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 7.971e-03s, min 1.407e-05s, max 1.587e-02s, tot 3.188e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.138e+01 1.113e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.181e+03, min 4.724e+02, max 1.890e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.570e+00, min 1.502e+00, max 8.753e+00 (4 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 2.032e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.371e-05s, min 1.097e-05s, max 1.597e-05s, tot 5.484e-05s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.585e+00 1.617e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 #pr: ======== Limiting to both transA=N and nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 13 13 0 4.0000 4.6667 3 1 100.66 7.967e-03 0.000e+00 9.537e-07 0.000e+00 7.202e-02 3.82e-01 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 13 13 0 4.0000 4.6667 3 1 100.66 1.596e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 13 13 0 4.0000 4.6667 3 1 201.33 1.599e-02 0.000e+00 1.907e-06 0.000e+00 1.400e-01 1.91e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 13 13 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.440e-01 3.67e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.216e-05 0.000e+00 5.081e-08 0.000e+00 1.750e-04 7.16e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.407e-05 0.000e+00 5.081e-08 0.000e+00 2.248e-04 3.86e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 24 24 0 4.0000 4.6667 3 1 7558.01 2.256e-03 0.000e+00 5.081e-08 0.000e+00 3.735e-02 7.16e+00 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 24 24 0 4.0000 4.6667 3 1 7558.01 1.311e-05 0.000e+00 5.081e-08 0.000e+00 2.601e-04 1.38e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 1046184.4 % faster, avg. sp. ratio 10462.844x, max sp. ratio 16738.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 93936.1/73411.9/151312.8/375744.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 8.8/ 9.0/ 35.9 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 8.8, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1129416.7 % faster, avg. sp. ratio 11295.167x, max sp. ratio 44406.382x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 187055.4/3444.4/735232.3/748221.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 16.7/ 14.4/ 19.8/ 66.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 16.7, min. 14.5, max. 19.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.211/ 0.141/ 0.281,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.145/ 0.191/ 0.382,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.865/ 2.913/ 9.999,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 31.964/ 3.858/ 13.778,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.13 s, min 0.07 s, max 0.14 s, tot 0.50 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.13 s, min 0.07 s, max 0.14 s, tot 0.50 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.653e-02, min 6.014e-03, max 2.403e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 5.722e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.397e-02s, min 7.967e-03s, max 1.599e-02s, tot 5.590e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.576e+01 5.151e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.01 s, min 0.00 s, max 0.04 s, tot 0.04 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.01 s, min 0.00 s, max 0.04 s, tot 0.04 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.724e+03, min 1.890e+03, max 7.558e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.104e+01, min 1.702e-01, max 2.928e+01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 2.032e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 5.739e-04s, min 1.216e-05s, max 2.256e-03s, tot 2.295e-03s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.419e+00 8.303e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 8) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -5058,53 +5108,53 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 13 13 0 4.0000 4.6667 3 1 25.17 1.587e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 1.55e-01 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 13 13 0 4.0000 4.6667 3 1 25.17 7.997e-03 0.000e+00 9.537e-07 0.000e+00 1.082e-01 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 13 13 0 4.0000 4.6667 3 1 50.33 8.002e-03 0.000e+00 1.907e-06 0.000e+00 9.604e-02 7.76e-02 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 13 13 0 4.0000 4.6667 3 1 100.66 1.407e-05 0.000e+00 9.537e-07 0.000e+00 2.611e-04 2.81e-01 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 13 13 0 4.0000 4.6667 3 1 100.66 7.967e-03 0.000e+00 9.537e-07 0.000e+00 7.202e-02 3.82e-01 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 13 13 0 4.0000 4.6667 3 1 100.66 1.596e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 13 13 0 4.0000 4.6667 3 1 201.33 1.599e-02 0.000e+00 1.907e-06 0.000e+00 1.400e-01 1.91e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 13 13 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.440e-01 3.67e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 24 24 0 4.0000 4.6667 3 1 472.38 1.192e-05 0.000e+00 5.081e-08 0.000e+00 2.921e-04 2.91e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 24 24 0 4.0000 4.6667 3 1 472.38 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.050e-04 1.73e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.097e-05 0.000e+00 5.081e-08 0.000e+00 2.768e-04 2.91e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.597e-05 0.000e+00 5.081e-08 0.000e+00 2.570e-04 5.27e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.216e-05 0.000e+00 5.081e-08 0.000e+00 1.750e-04 7.16e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 24 24 0 4.0000 4.6667 3 1 1889.50 1.407e-05 0.000e+00 5.081e-08 0.000e+00 2.248e-04 3.86e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 24 24 0 4.0000 4.6667 3 1 7558.01 2.256e-03 0.000e+00 5.081e-08 0.000e+00 3.735e-02 7.16e+00 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 24 24 0 4.0000 4.6667 3 1 7558.01 1.311e-05 0.000e+00 5.081e-08 0.000e+00 2.601e-04 1.38e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 888510.9 % faster, avg. sp. ratio 8886.109x, max sp. ratio 16738.250x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 87404.3/273.8/159407.8/699234.1 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.2/ 8.8/ 18.6/ 89.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.4, min. 8.8, max. 19.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 578149.6 % faster, avg. sp. ratio 5782.496x, max sp. ratio 44406.382x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 96064.1/3444.4/735232.3/768512.4 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 18.2/ 12.8/ 25.2/145.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 18.2, min. 12.9, max. 25.4 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.169/ 0.065/ 0.281,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.751/ 0.078/ 0.382,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.271/ 1.496/ 9.999,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 44.797/ 1.732/ 13.778,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.11 s, min 0.00 s, max 0.15 s, tot 0.86 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.11 s, min 0.00 s, max 0.15 s, tot 0.86 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.007e+02, min 2.517e+01, max 2.013e+02 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 8.634e-01, min 1.512e-03, max 6.825e+00 (8 samples) -#pr: best tun. rsb operation time was: on avg. 1.311e-06s, min 9.537e-07s, max 1.907e-06s, tot 1.049e-05s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.097e-02s, min 1.407e-05s, max 1.599e-02s, tot 8.778e-02s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.576e+01 1.113e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.062e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) -#pr: Record collection took 5.17 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.04 s, tot 0.04 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.04 s, tot 0.04 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.952e+03, min 4.724e+02, max 7.558e+03 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 7.807e+00, min 1.702e-01, max 2.928e+01 (8 samples) +#pr: best tun. rsb operation time was: on avg. 5.081e-08s, min 5.081e-08s, max 5.081e-08s, tot 4.065e-07s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 2.938e-04s, min 1.097e-05s, max 2.256e-03s, tot 2.350e-03s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.419e+00 1.617e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.625e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: Record collection took 0.46 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 100 environment variables in 4726 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 100 environment variables in 4779 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1740464023_gcc-14.2.rpr -# Removing the temporary record file rsbench_pr__1740464023_gcc-14.2.rpr.tmp. -# terminating run at 1740464034 (after 11.3s of w.c.t.) -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash ./scripts/doc-tests.sh +#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1774875120_gcc-14.2.rpr +# Removing the temporary record file rsbench_pr__1774875120_gcc-14.2.rpr.tmp. +# terminating run at 1774875127 (after 7.2s of w.c.t.) +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh ./scripts/doc-tests.sh + set -o pipefail + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x -+ grep '^.\{71,\}' + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/autotune.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/io-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/power.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/snippets.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/transpose.c ++ grep '^.\{71,\}' + true + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/README + grep '^[^ ].\{80,\}' @@ -5154,7 +5204,7 @@ type char codes:D S C Z gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' if test /build/reproducible-path/librsb-1.3.0.2+dfsg != /build/reproducible-path/librsb-1.3.0.2+dfsg ; then cp /build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/vf.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/examples ; fi -( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) +( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) + which rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench + BRF=test.rpr @@ -5164,15 +5214,15 @@ Will invoke autotuning for ~10.000000 s x 1 rounds, specifying verbosity=0 and threads=0. (>0 means no structure tuning; 0 means only structure tuning, <0 means tuning of both with (negated) thread count suggestion). # Requested no transposition. # performance record file set to: test.rpr -# beginning run at 1740464034 +# beginning run at 1774875127 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench --lower 100 --as-symmetric --types : -n 1 --notranspose --compare-competitors --verbose --verbose --write-performance-record=test.rpr # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 6.13e-07 s +# average timer granularity: 5.14e-08 s # Will write a final performance record to file test.rpr and periodic checkpoints to test.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -5211,15 +5261,15 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos12-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 2 (each --verbose occurrence counts +1) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5231,40 +5281,40 @@ # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.562s +# Memory benchmark took 6.629s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 4 samples (1008 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 40329 bytes -# so far, program took 5.587s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 174762 bytes +# so far, program took 6.631s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.191e-01 s (100.00 %) - analyzed arrays in 2.306e-02 s (19.36 %) - cleaned-up arrays in 1.884e-05 s (0.02 %) - deduplicated arrays in 1.502e-05 s (0.01 %) + converted COO to RSB in 1.001e-03 s (100.00 %) + analyzed arrays in 8.788e-04 s (87.78 %) + cleaned-up arrays in 1.407e-05 s (1.41 %) + deduplicated arrays in 9.060e-06 s (0.90 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 6.398e-02 s (53.72 %) - memory allocations took 1.311e-05 s (0.01 %) - leafs setup took 1.907e-06 s (0.00 %) - halfword conversion took 3.201e-02 s (26.88 %) -Built (100 x 100)[0x57efde10]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.119s): (100 x 100)[0x57efde10]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 6.104e-05 s (6.10 %) + memory allocations took 1.192e-05 s (1.19 %) + leafs setup took 3.099e-06 s (0.31 %) + halfword conversion took 2.098e-05 s (2.10 %) +Built (100 x 100)[0x5747ee50]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.001s): (100 x 100)[0x5747ee50]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5275,11 +5325,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5290,11 +5340,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5304,16 +5354,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001969s; avg 6.564e-05s ( +/- 29.90/ 47.82 %); best 4.601e-05s; worst 9.704e-05s; std dev. 2.243e-05 (taking best). -Reference operation time is 4.60148e-05 s (439 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 4.601e-05 Mflops: 438.989) -Merge (3 -> 1 leaves) took w.c.t. of 8.988e-05s, ~7.2e-05s of computing time (of which 3.099e-05s sorting, 4.053e-06s analysis) +3 iterations (1 th.) took 0.0001431s; avg 4.768e-05s ( +/- 35.00/ 68.00 %); best 3.099e-05s; worst 8.011e-05s; std dev. 2.293e-05 (taking best). +Reference operation time is 3.09944e-05 s (651.7 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 3.099e-05 Mflops: 651.730) +Merge (3 -> 1 leaves) took w.c.t. of 6.104e-05s, ~5.221e-05s of computing time (of which 2.193e-05s sorting, 2.861e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5324,11 +5374,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5338,14 +5388,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.697e-05s; avg 1.566e-05s ( +/- 5.58/ 9.64 %); best 1.478e-05s; worst 1.717e-05s; std dev. 1.072e-06 (taking best). -Reference operation time is 1.4782e-05 s (1367 Mflops) with 1 threads. +3 iterations (1 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 8.61/ 9.27 %); best 1.097e-05s; worst 1.311e-05s; std dev. 8.778e-07 (taking best). +Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5355,25 +5405,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.478e-05 s ~Mflops: 1366.531 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 3.113x: 4.601e-05s -> 1.478e-05s, so taking this instance. +After merge step 1: tpop: 1.097e-05 s ~Mflops: 1841.847 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 2.826x: 3.099e-05s -> 1.097e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.005936s (of which 9.489e-05s partitioning, 0.005511s I/O); computing times: 7.2e-05s in par. loops, 3.099e-05s sorting, 4.053e-06s analyzing) -Total merge + benchmarking process took 0.005936s, equivalent to 401.6/129.0 new/old ops (0.000221s for 2 clones -- as 15.0/4.8 ops, or 7.5/2.4 ops per clone), SPEEDUP of 3.113x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 3.113x (4.601e-05s -> 1.478e-05s), will amortize in 190.1 ops by saving 3.123e-05s per op. -In 1 tuning rounds (tot. 0.0063s, 0.00022s for constructor, 2 clones) obtained a SPEEDUP of 211.3% (3.113x) (from 439 to 1367 Mflops). Employed 0.056s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004475s (of which 6.604e-05s partitioning, 0.004289s I/O); computing times: 5.221e-05s in par. loops, 2.193e-05s sorting, 2.861e-06s analyzing) +Total merge + benchmarking process took 0.004475s, equivalent to 408.0/144.4 new/old ops (6.294e-05s for 2 clones -- as 5.7/2.0 ops, or 2.9/1.0 ops per clone), SPEEDUP of 2.826x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 2.826x (3.099e-05s -> 1.097e-05s), will amortize in 223.5 ops by saving 2.003e-05s per op. +In 1 tuning rounds (tot. 0.0047s, 6.3e-05s for constructor, 2 clones) obtained a SPEEDUP of 182.6% (2.826x) (from 651.7 to 1842 Mflops). Employed 0.0042s for I/O of matrix plots. #pr: updating sample at index 1 (0^th of 4), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.0624261 s (4.601e-05 s -> 1.478e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.00899196 s (3.099e-05 s -> 1.097e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5384,11 +5434,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5399,13 +5449,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 13. +Starting with requested 0 threads ; current default 1 ; at most 24. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5416,11 +5466,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5430,27 +5480,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 5.007e-05s; avg 1.669e-05s ( +/- 10.00/ 14.29 %); best 1.502e-05s; worst 1.907e-05s; std dev. 1.73e-06 (taking best). -Reference operation time is 1.50204e-05 s (1345 Mflops) with 1 threads. +3 iterations (1 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 3.23/ 6.45 %); best 1.192e-05s; worst 1.311e-05s; std dev. 5.62e-07 (taking best). +Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.070e-04 s (100.00 %) - analyzed arrays in 2.599e-05 s (24.28 %) - cleaned-up arrays in 1.597e-05 s (14.92 %) - deduplicated arrays in 1.717e-05 s (16.04 %) + converted COO to RSB in 6.700e-05 s (100.00 %) + analyzed arrays in 1.216e-05 s (18.15 %) + cleaned-up arrays in 1.192e-05 s (17.79 %) + deduplicated arrays in 1.311e-05 s (19.57 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.219e-05 s (30.07 %) - memory allocations took 4.768e-06 s (4.45 %) - leafs setup took 9.537e-07 s (0.89 %) - halfword conversion took 7.868e-06 s (7.35 %) -Built (100 x 100)[0x57f06f20]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 13). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + shuffled partitions in 1.884e-05 s (28.11 %) + memory allocations took 2.861e-06 s (4.27 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 6.199e-06 s (9.25 %) +Built (100 x 100)[0x57488cd0]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 24). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5461,12 +5511,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5477,22 +5527,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.630e-04 s (100.00 %) - analyzed arrays in 1.080e-04 s (41.07 %) - cleaned-up arrays in 1.621e-05 s (6.17 %) - deduplicated arrays in 1.597e-05 s (6.07 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.321e-05 s (31.64 %) - memory allocations took 5.722e-06 s (2.18 %) - leafs setup took 9.537e-07 s (0.36 %) - halfword conversion took 3.099e-05 s (11.79 %) -Built (100 x 100)[0x57efe150]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 1.030e-04 s (100.00 %) + analyzed arrays in 2.694e-05 s (26.16 %) + cleaned-up arrays in 1.097e-05 s (10.65 %) + deduplicated arrays in 1.311e-05 s (12.73 %) + sorted arrays in 9.537e-07 s (0.93 %) + shuffled partitions in 3.219e-05 s (31.25 %) + memory allocations took 2.861e-06 s (2.78 %) + leafs setup took 9.537e-07 s (0.93 %) + halfword conversion took 1.502e-05 s (14.58 %) +Built (100 x 100)[0x5747f190]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5503,11 +5553,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5518,11 +5568,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5532,16 +5582,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001628s; avg 5.428e-05s ( +/- 4.25/ 6.73 %); best 5.198e-05s; worst 5.794e-05s; std dev. 2.614e-06 (taking best). -Reference operation time is 5.19753e-05 s (388.6 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 5.19753e-05 s/0 threads (speedup 0.288991 x), same?n. +3 iterations (1 th.) took 9.704e-05s; avg 3.235e-05s ( +/- 4.18/ 5.41 %); best 3.099e-05s; worst 3.409e-05s; std dev. 1.296e-06 (taking best). +Reference operation time is 3.09944e-05 s (651.7 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 3.09944e-05 s/0 threads (speedup 0.384615 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5552,12 +5602,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5568,22 +5618,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.228e-02 s (100.00 %) - analyzed arrays in 1.109e-04 s (0.34 %) - cleaned-up arrays in 1.788e-05 s (0.06 %) - deduplicated arrays in 1.907e-05 s (0.06 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 1.340e-04 s (0.42 %) - memory allocations took 8.345e-06 s (0.03 %) - leafs setup took 2.861e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (99.08 %) -Built (100 x 100)[0x57efe9b0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 2.799e-04 s (100.00 %) + analyzed arrays in 1.290e-04 s (46.08 %) + cleaned-up arrays in 1.216e-05 s (4.34 %) + deduplicated arrays in 8.082e-05 s (28.88 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 3.505e-05 s (12.52 %) + memory allocations took 4.053e-06 s (1.45 %) + leafs setup took 9.537e-07 s (0.34 %) + halfword conversion took 1.597e-05 s (5.71 %) +Built (100 x 100)[0x5747f920]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5594,11 +5644,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5609,11 +5659,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5623,16 +5673,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04796s; avg 0.01599s ( +/- 0.56/ 0.45 %); best 0.0159s; worst 0.01606s; std dev. 6.68e-05 (taking best). -Reference operation time is 0.015897 s (1.271 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.015897 s/0 threads (speedup 0.000944854 x), same?n. +3 iterations (1 th.) took 0.000114s; avg 3.799e-05s ( +/- 5.23/ 4.81 %); best 3.6e-05s; worst 3.982e-05s; std dev. 1.561e-06 (taking best). +Reference operation time is 3.60012e-05 s (561.1 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 3.60012e-05 s/0 threads (speedup 0.331126 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5643,12 +5693,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5659,22 +5709,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.037e-01 s (100.00 %) - analyzed arrays in 3.971e-02 s (38.27 %) - cleaned-up arrays in 1.597e-05 s (0.02 %) - deduplicated arrays in 1.693e-05 s (0.02 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 3.200e-02 s (30.84 %) - memory allocations took 9.060e-06 s (0.01 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 3.199e-02 s (30.83 %) -Built (100 x 100)[0x57f07000]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 25, symflags:'LS' + converted COO to RSB in 1.619e-04 s (100.00 %) + analyzed arrays in 6.986e-05 s (43.15 %) + cleaned-up arrays in 1.216e-05 s (7.51 %) + deduplicated arrays in 1.287e-05 s (7.95 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 4.411e-05 s (27.25 %) + memory allocations took 5.007e-06 s (3.09 %) + leafs setup took 1.907e-06 s (1.18 %) + halfword conversion took 1.597e-05 s (9.87 %) +Built (100 x 100)[0x57488db0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 23, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5685,11 +5735,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5700,11 +5750,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5714,16 +5764,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05013s; avg 0.01671s ( +/- 4.94/ 9.39 %); best 0.01589s; worst 0.01828s; std dev. 0.001111 (taking best). -Reference operation time is 0.0158851 s (1.272 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 1, 25 leaves, 2.306 bytes/nz, 0.0158851 s/0 threads (speedup 0.000945563 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 35 subms, 25 lsubms, 2.3057 bpnz +3 iterations (1 th.) took 0.0001259s; avg 4.196e-05s ( +/- 2.27/ 2.27 %); best 4.101e-05s; worst 4.292e-05s; std dev. 7.787e-07 (taking best). +Reference operation time is 4.1008e-05 s (492.6 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 1, 23 leaves, 2.295 bytes/nz, 4.1008e-05 s/0 threads (speedup 0.290698 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 32 subms, 23 lsubms, 2.2947 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5734,12 +5784,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5750,22 +5800,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.015e-01 s (100.00 %) - analyzed arrays in 3.746e-02 s (36.91 %) - cleaned-up arrays in 1.907e-05 s (0.02 %) - deduplicated arrays in 1.907e-05 s (0.02 %) + converted COO to RSB in 2.248e-04 s (100.00 %) + analyzed arrays in 1.061e-04 s (47.19 %) + cleaned-up arrays in 1.216e-05 s (5.41 %) + deduplicated arrays in 1.287e-05 s (5.73 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.202e-02 s (31.54 %) - memory allocations took 1.192e-05 s (0.01 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (31.50 %) -Built (100 x 100)[0x57f07000]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + shuffled partitions in 7.105e-05 s (31.60 %) + memory allocations took 3.815e-06 s (1.70 %) + leafs setup took 2.861e-06 s (1.27 %) + halfword conversion took 1.597e-05 s (7.10 %) +Built (100 x 100)[0x57488db0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 50, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5776,11 +5826,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5791,11 +5841,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5805,16 +5855,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.01302s; avg 0.004341s ( +/- 98.39/169.73 %); best 6.986e-05s; worst 0.01171s; std dev. 0.005232 (taking best). -Reference operation time is 6.98566e-05 s (289.2 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 6.98566e-05 s/0 threads (speedup 0.215017 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +3 iterations (1 th.) took 0.0001509s; avg 5.031e-05s ( +/- 2.84/ 3.32 %); best 4.888e-05s; worst 5.198e-05s; std dev. 1.277e-06 (taking best). +Reference operation time is 4.88758e-05 s (413.3 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 2, 50 leaves, 2.416 bytes/nz, 4.88758e-05 s/0 threads (speedup 0.243902 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 68 subms, 50 lsubms, 2.4158 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5825,12 +5875,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5841,22 +5891,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.749e-04 s (100.00 %) - analyzed arrays in 2.329e-04 s (49.05 %) - cleaned-up arrays in 1.597e-05 s (3.36 %) - deduplicated arrays in 1.597e-05 s (3.36 %) - sorted arrays in 9.537e-07 s (0.20 %) - shuffled partitions in 1.662e-04 s (34.99 %) - memory allocations took 9.060e-06 s (1.91 %) - leafs setup took 5.960e-06 s (1.26 %) - halfword conversion took 2.599e-05 s (5.47 %) -Built (100 x 100)[0x57f2de80]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 3.319e-04 s (100.00 %) + analyzed arrays in 1.471e-04 s (44.32 %) + cleaned-up arrays in 1.216e-05 s (3.66 %) + deduplicated arrays in 1.287e-05 s (3.88 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 1.340e-04 s (40.37 %) + memory allocations took 3.815e-06 s (1.15 %) + leafs setup took 5.960e-06 s (1.80 %) + halfword conversion took 1.597e-05 s (4.81 %) +Built (100 x 100)[0x574afc30]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 103, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5867,11 +5917,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5882,11 +5932,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5896,17 +5946,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000237s; avg 7.9e-05s ( +/- 13.98/ 19.22 %); best 6.795e-05s; worst 9.418e-05s; std dev. 1.11e-05 (taking best). -Reference operation time is 6.79493e-05 s (297.3 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 6.79493e-05 s/0 threads (speedup 0.221053 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1344.84 Mflops. +3 iterations (1 th.) took 0.000196s; avg 6.533e-05s ( +/- 2.19/ 2.55 %); best 6.39e-05s; worst 6.7e-05s; std dev. 1.277e-06 (taking best). +Reference operation time is 6.38962e-05 s (316.1 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 4, 103 leaves, 2.654 bytes/nz, 6.38962e-05 s/0 threads (speedup 0.186567 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 139 subms, 103 lsubms, 2.6543 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1694.5 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5916,23 +5966,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.351941 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (1.50204e-05 : 1.50204e-05) / 1 x (1.50204e-05 : 1.50204e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.00283217 s (eq. to 2e+02/ 2e+02 old/new op.times), gained local/global speedup 1 x (1.19209e-05 : 1.19209e-05) / 1 x (1.19209e-05 : 1.19209e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.35s, 0.24s for constructor, 0 clones) obtained NO speedup (best stays 1345 Mflops). -Second run of RSB Autotuner took 0.352012 s and estimated a speedup of 1.000000 x (1.502e-05 s -> 1.502e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.0028s, 0.0016s for constructor, 0 clones) obtained NO speedup (best stays 1694 Mflops). +Second run of RSB Autotuner took 0.00286698 s and estimated a speedup of 1.000000 x (1.192e-05 s -> 1.192e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023059 0.063977 0.087036 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087036 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023059 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063977 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000879 0.000061 0.000940 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000940 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000879 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000061 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087036 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000940 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -5947,45 +5997,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.119104 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.00100112 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.0230591 0 0.063977 -# so far, program took 6.141s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.414s/0.000s . +%constructor:lower-100x100-5050nz 0 0.000878811 0 6.10352e-05 +# so far, program took 6.673s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.012s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.06769s (system CPU time used) -ru_utime : 11.8s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1059s (system CPU time used) +ru_utime : 7.073s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 6.141s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.414s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 6.673s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.012s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.680e-04 s (100.00 %) - analyzed arrays in 3.982e-05 s (14.86 %) - cleaned-up arrays in 1.597e-05 s (5.96 %) - deduplicated arrays in 1.693e-05 s (6.32 %) + converted COO to RSB in 1.070e-04 s (100.00 %) + analyzed arrays in 1.979e-05 s (18.49 %) + cleaned-up arrays in 1.097e-05 s (10.24 %) + deduplicated arrays in 1.287e-05 s (12.03 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.070e-04 s (39.95 %) - memory allocations took 7.868e-06 s (2.94 %) - leafs setup took 2.146e-06 s (0.80 %) - halfword conversion took 7.582e-05 s (28.29 %) -Built (100 x 100)[0x57f07000]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57f07000]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 3.982e-05 s (37.19 %) + memory allocations took 6.437e-06 s (6.01 %) + leafs setup took 1.192e-06 s (1.11 %) + halfword conversion took 1.597e-05 s (14.92 %) +Built (100 x 100)[0x57488db0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.000s): (100 x 100)[0x57488db0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5996,11 +6046,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6011,11 +6061,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6025,16 +6075,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001371s; avg 4.57e-05s ( +/- 14.43/ 24.70 %); best 3.91e-05s; worst 5.698e-05s; std dev. 8.018e-06 (taking best). -Reference operation time is 3.91006e-05 s (516.6 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 3.91e-05 Mflops: 516.615) -Merge (3 -> 1 leaves) took w.c.t. of 7.987e-05s, ~6.795e-05s of computing time (of which 2.503e-05s sorting, 1.907e-06s analysis) +3 iterations (1 th.) took 0.0001101s; avg 3.672e-05s ( +/- 18.18/ 33.77 %); best 3.004e-05s; worst 4.911e-05s; std dev. 8.775e-06 (taking best). +Reference operation time is 3.00407e-05 s (672.4 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 3.004e-05 Mflops: 672.420) +Merge (3 -> 1 leaves) took w.c.t. of 4.792e-05s, ~4.22e-05s of computing time (of which 1.788e-05s sorting, 9.537e-07s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6045,11 +6095,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6059,14 +6109,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.506e-05s; avg 1.502e-05s ( +/- 0.00/ 0.00 %); best 1.502e-05s; worst 1.502e-05s; std dev. 0 (taking best). -Reference operation time is 1.50204e-05 s (1345 Mflops) with 1 threads. +3 iterations (1 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 0.66/ 1.32 %); best 1.192e-05s; worst 1.216e-05s; std dev. 1.124e-07 (taking best). +Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6076,25 +6126,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.502e-05 s ~Mflops: 1344.840 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 2.603x: 3.91e-05s -> 1.502e-05s, so taking this instance. +After merge step 1: tpop: 1.192e-05 s ~Mflops: 1694.499 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 2.520x: 3.004e-05s -> 1.192e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.005432s (of which 8.488e-05s partitioning, 0.005071s I/O); computing times: 6.795e-05s in par. loops, 2.503e-05s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.005432s, equivalent to 361.7/138.9 new/old ops (0.001003s for 2 clones -- as 66.8/25.7 ops, or 33.4/12.8 ops per clone), SPEEDUP of 2.603x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 2.603x (3.91e-05s -> 1.502e-05s), will amortize in 225.6 ops by saving 2.408e-05s per op. -In 1 tuning rounds (tot. 0.0066s, 0.001s for constructor, 2 clones) obtained a SPEEDUP of 160.3% (2.603x) (from 516.6 to 1345 Mflops). Employed 0.0094s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004186s (of which 5.102e-05s partitioning, 0.004036s I/O); computing times: 4.22e-05s in par. loops, 1.788e-05s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 0.004186s, equivalent to 351.1/139.3 new/old ops (4.482e-05s for 2 clones -- as 3.8/1.5 ops, or 1.9/0.7 ops per clone), SPEEDUP of 2.520x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 2.520x (3.004e-05s -> 1.192e-05s), will amortize in 231.0 ops by saving 1.812e-05s per op. +In 1 tuning rounds (tot. 0.0044s, 4.5e-05s for constructor, 2 clones) obtained a SPEEDUP of 152.0% (2.52x) (from 672.4 to 1694 Mflops). Employed 0.004s for I/O of matrix plots. #pr: updating sample at index 2 (1^th of 4), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.0162451 s (3.910e-05 s -> 1.502e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.00847912 s (3.004e-05 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6105,11 +6155,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6120,13 +6170,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 13. +Starting with requested 0 threads ; current default 1 ; at most 24. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6137,11 +6187,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6151,27 +6201,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.697e-05s; avg 1.566e-05s ( +/- 4.06/ 8.12 %); best 1.502e-05s; worst 1.693e-05s; std dev. 8.991e-07 (taking best). -Reference operation time is 1.50204e-05 s (1345 Mflops) with 1 threads. +3 iterations (1 th.) took 3.719e-05s; avg 1.24e-05s ( +/- 11.54/ 13.46 %); best 1.097e-05s; worst 1.407e-05s; std dev. 1.277e-06 (taking best). +Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.020e-04 s (100.00 %) - analyzed arrays in 2.289e-05 s (22.43 %) - cleaned-up arrays in 1.597e-05 s (15.65 %) - deduplicated arrays in 1.693e-05 s (16.59 %) + converted COO to RSB in 7.486e-05 s (100.00 %) + analyzed arrays in 1.192e-05 s (15.92 %) + cleaned-up arrays in 1.097e-05 s (14.65 %) + deduplicated arrays in 1.311e-05 s (17.52 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.099e-05 s (30.37 %) - memory allocations took 5.245e-06 s (5.14 %) - leafs setup took 1.192e-06 s (1.17 %) - halfword conversion took 7.868e-06 s (7.71 %) -Built (100 x 100)[0x57f06f20]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 13). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + shuffled partitions in 1.693e-05 s (22.61 %) + memory allocations took 1.502e-05 s (20.06 %) + leafs setup took 9.537e-07 s (1.27 %) + halfword conversion took 5.960e-06 s (7.96 %) +Built (100 x 100)[0x57488cd0]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 24). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6182,12 +6232,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6198,22 +6248,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.040e-04 s (100.00 %) - analyzed arrays in 2.885e-05 s (27.75 %) - cleaned-up arrays in 1.812e-05 s (17.43 %) - deduplicated arrays in 1.597e-05 s (15.37 %) - sorted arrays in 9.537e-07 s (0.92 %) - shuffled partitions in 2.098e-05 s (20.18 %) - memory allocations took 5.245e-06 s (5.05 %) - leafs setup took 1.907e-06 s (1.83 %) - halfword conversion took 1.097e-05 s (10.55 %) -Built (100 x 100)[0x57efeb50]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 6.509e-05 s (100.00 %) + analyzed arrays in 1.597e-05 s (24.54 %) + cleaned-up arrays in 1.097e-05 s (16.85 %) + deduplicated arrays in 1.311e-05 s (20.15 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 1.216e-05 s (18.68 %) + memory allocations took 2.861e-06 s (4.40 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 1.001e-05 s (15.38 %) +Built (100 x 100)[0x5747f920]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6224,11 +6274,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6239,11 +6289,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6253,15 +6303,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.601e-05s; avg 1.534e-05s ( +/- 3.63/ 5.70 %); best 1.478e-05s; worst 1.621e-05s; std dev. 6.258e-07 (taking best). -Reference operation time is 1.4782e-05 s (1367 Mflops) with 1 threads. -Challenging best inner round reference (1.50204e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.4782e-05 s/0 threads (speedup 1.01613 x), same?n. -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +3 iterations (1 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 8.61/ 7.28 %); best 1.097e-05s; worst 1.287e-05s; std dev. 7.867e-07 (taking best). +Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.09673e-05 s/0 threads (speedup 1 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6272,12 +6323,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6288,22 +6339,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 9.396e-03 s (100.00 %) - analyzed arrays in 9.262e-03 s (98.57 %) - cleaned-up arrays in 1.502e-05 s (0.16 %) - deduplicated arrays in 1.597e-05 s (0.17 %) - sorted arrays in 9.537e-07 s (0.01 %) - shuffled partitions in 6.604e-05 s (0.70 %) - memory allocations took 5.960e-06 s (0.06 %) - leafs setup took 1.907e-06 s (0.02 %) - halfword conversion took 2.623e-05 s (0.28 %) -Built (100 x 100)[0x57f07000]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 1.099e-04 s (100.00 %) + analyzed arrays in 3.815e-05 s (34.71 %) + cleaned-up arrays in 1.121e-05 s (10.20 %) + deduplicated arrays in 1.287e-05 s (11.71 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 2.885e-05 s (26.25 %) + memory allocations took 2.861e-06 s (2.60 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 1.502e-05 s (13.67 %) +Built (100 x 100)[0x5747f920]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6314,11 +6365,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6329,11 +6380,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6343,16 +6394,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001349s; avg 4.498e-05s ( +/- 4.59/ 8.66 %); best 4.292e-05s; worst 4.888e-05s; std dev. 2.755e-06 (taking best). -Reference operation time is 4.29153e-05 s (470.7 Mflops) with 1 threads. -Challenging best inner round reference (1.4782e-05 s/0 threads) with: subdivision 0.5, 8 leaves, 2.185 bytes/nz, 4.29153e-05 s/0 threads (speedup 0.344444 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 11 subms, 8 lsubms, 2.1846 bpnz +3 iterations (1 th.) took 9.084e-05s; avg 3.028e-05s ( +/- 4.72/ 5.51 %); best 2.885e-05s; worst 3.195e-05s; std dev. 1.277e-06 (taking best). +Reference operation time is 2.88486e-05 s (700.2 Mflops) with 1 threads. +Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.5, 6 leaves, 2.163 bytes/nz, 2.88486e-05 s/0 threads (speedup 0.380165 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6363,12 +6414,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6379,22 +6430,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.059e-04 s (100.00 %) - analyzed arrays in 1.440e-04 s (47.08 %) - cleaned-up arrays in 1.478e-05 s (4.83 %) - deduplicated arrays in 1.597e-05 s (5.22 %) - sorted arrays in 1.192e-06 s (0.39 %) - shuffled partitions in 9.203e-05 s (30.09 %) - memory allocations took 6.914e-06 s (2.26 %) - leafs setup took 4.053e-06 s (1.33 %) - halfword conversion took 2.599e-05 s (8.50 %) -Built (100 x 100)[0x57f07000]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 18, symflags:'LS' + converted COO to RSB in 3.171e-04 s (100.00 %) + analyzed arrays in 1.640e-04 s (51.73 %) + cleaned-up arrays in 8.392e-05 s (26.47 %) + deduplicated arrays in 1.502e-05 s (4.74 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 3.290e-05 s (10.38 %) + memory allocations took 5.245e-06 s (1.65 %) + leafs setup took 1.907e-06 s (0.60 %) + halfword conversion took 1.407e-05 s (4.44 %) +Built (100 x 100)[0x57488db0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6405,11 +6456,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6420,11 +6471,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6434,16 +6485,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001872s; avg 6.239e-05s ( +/- 18.22/ 20.38 %); best 5.102e-05s; worst 7.51e-05s; std dev. 9.877e-06 (taking best). -Reference operation time is 5.10216e-05 s (395.9 Mflops) with 1 threads. -Challenging best inner round reference (1.4782e-05 s/0 threads) with: subdivision 1, 18 leaves, 2.261 bytes/nz, 5.10216e-05 s/0 threads (speedup 0.28972 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 25 subms, 18 lsubms, 2.2614 bpnz +3 iterations (1 th.) took 0.0001142s; avg 3.807e-05s ( +/- 2.92/ 5.22 %); best 3.695e-05s; worst 4.005e-05s; std dev. 1.408e-06 (taking best). +Reference operation time is 3.69549e-05 s (546.6 Mflops) with 1 threads. +Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 1, 16 leaves, 2.25 bytes/nz, 3.69549e-05 s/0 threads (speedup 0.296774 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6454,12 +6505,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6470,22 +6521,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.861e-04 s (100.00 %) - analyzed arrays in 2.649e-04 s (54.49 %) - cleaned-up arrays in 1.597e-05 s (3.29 %) - deduplicated arrays in 1.693e-05 s (3.48 %) + converted COO to RSB in 1.979e-04 s (100.00 %) + analyzed arrays in 9.608e-05 s (48.55 %) + cleaned-up arrays in 1.121e-05 s (5.66 %) + deduplicated arrays in 1.287e-05 s (6.51 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.428e-04 s (29.38 %) - memory allocations took 7.391e-06 s (1.52 %) - leafs setup took 5.007e-06 s (1.03 %) - halfword conversion took 3.219e-05 s (6.62 %) -Built (100 x 100)[0x57f07000]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + shuffled partitions in 5.412e-05 s (27.35 %) + memory allocations took 4.768e-06 s (2.41 %) + leafs setup took 2.861e-06 s (1.45 %) + halfword conversion took 1.502e-05 s (7.59 %) +Built (100 x 100)[0x57488db0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 37, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6496,11 +6547,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6511,11 +6562,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6525,16 +6576,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001879s; avg 6.262e-05s ( +/- 4.06/ 5.08 %); best 6.008e-05s; worst 6.58e-05s; std dev. 2.379e-06 (taking best). -Reference operation time is 6.00815e-05 s (336.2 Mflops) with 1 threads. -Challenging best inner round reference (1.4782e-05 s/0 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 6.00815e-05 s/0 threads (speedup 0.246032 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +3 iterations (1 th.) took 0.0001349s; avg 4.498e-05s ( +/- 2.47/ 2.30 %); best 4.387e-05s; worst 4.601e-05s; std dev. 8.778e-07 (taking best). +Reference operation time is 4.3869e-05 s (460.5 Mflops) with 1 threads. +Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 2, 37 leaves, 2.375 bytes/nz, 4.3869e-05 s/0 threads (speedup 0.25 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 51 subms, 37 lsubms, 2.3747 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6545,12 +6596,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6561,22 +6612,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 8.621e-04 s (100.00 %) - analyzed arrays in 2.871e-04 s (33.30 %) - cleaned-up arrays in 2.122e-05 s (2.46 %) - deduplicated arrays in 1.884e-05 s (2.18 %) - sorted arrays in 9.537e-07 s (0.11 %) - shuffled partitions in 1.540e-04 s (17.87 %) - memory allocations took 2.098e-05 s (2.43 %) - leafs setup took 3.695e-05 s (4.29 %) - halfword conversion took 3.190e-04 s (37.00 %) -Built (100 x 100)[0x57f28f90]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 2.859e-04 s (100.00 %) + analyzed arrays in 1.390e-04 s (48.62 %) + cleaned-up arrays in 1.192e-05 s (4.17 %) + deduplicated arrays in 1.311e-05 s (4.59 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 9.799e-05 s (34.28 %) + memory allocations took 2.861e-06 s (1.00 %) + leafs setup took 5.007e-06 s (1.75 %) + halfword conversion took 1.597e-05 s (5.59 %) +Built (100 x 100)[0x5749c070]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 78, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6587,11 +6638,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6602,11 +6653,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6616,17 +6667,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0002158s; avg 7.192e-05s ( +/- 5.19/ 6.74 %); best 6.819e-05s; worst 7.677e-05s; std dev. 3.591e-06 (taking best). -Reference operation time is 6.81877e-05 s (296.2 Mflops) with 1 threads. -Challenging best inner round reference (1.4782e-05 s/0 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 6.81877e-05 s/0 threads (speedup 0.216783 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -Best sparse multiply performance with subdivision multiplier of 0.25: 1366.53 Mflops. +3 iterations (1 th.) took 0.0001709s; avg 5.698e-05s ( +/- 1.67/ 1.67 %); best 5.603e-05s; worst 5.794e-05s; std dev. 7.787e-07 (taking best). +Reference operation time is 5.60284e-05 s (360.5 Mflops) with 1 threads. +Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 4, 78 leaves, 2.556 bytes/nz, 5.60284e-05 s/0 threads (speedup 0.195745 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 106 subms, 78 lsubms, 2.5560 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1841.85 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6636,23 +6687,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.0137661 s (eq. to 9e+02/ 9e+02 old/new op.times), gained local/global speedup 1.01613 x (1.50204e-05 : 1.4782e-05) / 1.01613 x (1.50204e-05 : 1.4782e-05). This is amortizable in 57739 op.times. -In 1 tuning rounds (tot. 0.014s, 0.012s for constructor, 0 clones) obtained a SPEEDUP of 1.6% (1.016x) (from 1345 to 1367 Mflops). -Second run of RSB Autotuner took 0.0138199 s and estimated a speedup of 1.016129 x (1.502e-05 s -> 1.478e-05 s per op) in new matrix (1 -> 1 lsubm) -RSB Autotuner suggested a new matrix: freeing the old one. +Last tuner inner round (1 of 1) took 0.00223398 s (eq. to 2e+02/ 2e+02 old/new op.times), gained local/global speedup 1 x (1.09673e-05 : 1.09673e-05) / 1 x (1.09673e-05 : 1.09673e-05). This is not amortizable ! +Auto tuning inner round 1 did not find a configuration better than the original. +In 1 tuning rounds (tot. 0.0022s, 0.0013s for constructor, 0 clones) obtained NO speedup (best stays 1842 Mflops). +Second run of RSB Autotuner took 0.00226784 s and estimated a speedup of 1.000000 x (1.097e-05 s -> 1.097e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000040 0.000107 0.000147 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000147 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000040 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000107 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000020 0.000040 0.000060 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000060 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000020 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000040 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000147 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000060 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -6667,45 +6718,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.000267982 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.00010705 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 3.98159e-05 0 0.00010705 -# so far, program took 6.190s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.445s/0.000s . +%constructor:lower-100x100-5050nz 0 1.97887e-05 0 3.98159e-05 +# so far, program took 6.712s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.023s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.07165s (system CPU time used) -ru_utime : 12.39s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1248s (system CPU time used) +ru_utime : 7.596s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 6.190s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.445s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 6.712s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.023s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.310e-04 s (100.00 %) - analyzed arrays in 3.600e-05 s (15.58 %) - cleaned-up arrays in 1.502e-05 s (6.50 %) - deduplicated arrays in 1.693e-05 s (7.33 %) - sorted arrays in 1.192e-06 s (0.52 %) - shuffled partitions in 1.190e-04 s (51.50 %) - memory allocations took 1.502e-05 s (6.50 %) - leafs setup took 1.907e-06 s (0.83 %) - halfword conversion took 2.408e-05 s (10.42 %) -Built (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + converted COO to RSB in 1.099e-04 s (100.00 %) + analyzed arrays in 2.122e-05 s (19.31 %) + cleaned-up arrays in 1.121e-05 s (10.20 %) + deduplicated arrays in 1.383e-05 s (12.58 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 4.482e-05 s (40.78 %) + memory allocations took 2.861e-06 s (2.60 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 1.597e-05 s (14.53 %) +Built (100 x 100)[0x57488db0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.000s): (100 x 100)[0x57488db0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6716,11 +6767,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6731,11 +6782,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6745,16 +6796,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003519s; avg 0.0001173s ( +/- 15.65/ 25.20 %); best 9.894e-05s; worst 0.0001469s; std dev. 2.111e-05 (taking best). -Reference operation time is 9.89437e-05 s (816.6 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 9.894e-05 Mflops: 816.626) -Merge (3 -> 1 leaves) took w.c.t. of 7.486e-05s, ~6.58e-05s of computing time (of which 2.813e-05s sorting, 1.907e-06s analysis) +3 iterations (1 th.) took 0.000277s; avg 9.235e-05s ( +/- 6.80/ 13.60 %); best 8.607e-05s; worst 0.0001049s; std dev. 8.879e-06 (taking best). +Reference operation time is 8.60691e-05 s (938.8 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 8.607e-05 Mflops: 938.781) +Merge (3 -> 1 leaves) took w.c.t. of 4.888e-05s, ~4.411e-05s of computing time (of which 2.003e-05s sorting, 9.537e-07s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6765,11 +6816,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6779,14 +6830,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001822s; avg 6.072e-05s ( +/- 1.05/ 0.52 %); best 6.008e-05s; worst 6.104e-05s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.00815e-05 s (1345 Mflops) with 1 threads. +3 iterations (1 th.) took 0.000185s; avg 6.167e-05s ( +/- 1.03/ 0.52 %); best 6.104e-05s; worst 6.199e-05s; std dev. 4.496e-07 (taking best). +Reference operation time is 6.10352e-05 s (1324 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6796,25 +6847,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 6.008e-05 s ~Mflops: 1344.840 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.647x: 9.894e-05s -> 6.008e-05s, so taking this instance. +After merge step 1: tpop: 6.104e-05 s ~Mflops: 1323.827 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.410x: 8.607e-05s -> 6.104e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.007882s (of which 7.987e-05s partitioning, 0.007353s I/O); computing times: 6.58e-05s in par. loops, 2.813e-05s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.007882s, equivalent to 131.2/79.7 new/old ops (0.000118s for 2 clones -- as 2.0/1.2 ops, or 1.0/0.6 ops per clone), SPEEDUP of 1.647x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.647x (9.894e-05s -> 6.008e-05s), will amortize in 202.8 ops by saving 3.886e-05s per op. -In 1 tuning rounds (tot. 0.0084s, 0.00012s for constructor, 2 clones) obtained a SPEEDUP of 64.7% (1.647x) (from 816.6 to 1345 Mflops). Employed 0.016s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004544s (of which 6.294e-05s partitioning, 0.004224s I/O); computing times: 4.411e-05s in par. loops, 2.003e-05s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 0.004544s, equivalent to 74.4/52.8 new/old ops (6.008e-05s for 2 clones -- as 1.0/0.7 ops, or 0.5/0.3 ops per clone), SPEEDUP of 1.410x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.410x (8.607e-05s -> 6.104e-05s), will amortize in 181.5 ops by saving 2.503e-05s per op. +In 1 tuning rounds (tot. 0.0049s, 6e-05s for constructor, 2 clones) obtained a SPEEDUP of 41.0% (1.41x) (from 938.8 to 1324 Mflops). Employed 0.0042s for I/O of matrix plots. #pr: updating sample at index 3 (2^th of 4), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.024364 s (9.894e-05 s -> 6.008e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.00915408 s (8.607e-05 s -> 6.104e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6825,11 +6876,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6840,13 +6891,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 13. +Starting with requested 0 threads ; current default 1 ; at most 24. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6857,11 +6908,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6871,27 +6922,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001841s; avg 6.135e-05s ( +/- 2.46/ 2.98 %); best 5.984e-05s; worst 6.318e-05s; std dev. 1.381e-06 (taking best). -Reference operation time is 5.98431e-05 s (1350 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001862s; avg 6.207e-05s ( +/- 1.66/ 1.79 %); best 6.104e-05s; worst 6.318e-05s; std dev. 8.778e-07 (taking best). +Reference operation time is 6.10352e-05 s (1324 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.638e-04 s (100.00 %) - analyzed arrays in 2.840e-04 s (78.05 %) - cleaned-up arrays in 1.502e-05 s (4.13 %) - deduplicated arrays in 1.621e-05 s (4.46 %) - sorted arrays in 9.537e-07 s (0.26 %) - shuffled partitions in 3.195e-05 s (8.78 %) - memory allocations took 5.960e-06 s (1.64 %) - leafs setup took 9.537e-07 s (0.26 %) - halfword conversion took 7.868e-06 s (2.16 %) -Built (100 x 100)[0x57f06f20]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 13). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 7.296e-05 s (100.00 %) + analyzed arrays in 1.192e-05 s (16.34 %) + cleaned-up arrays in 1.478e-05 s (20.26 %) + deduplicated arrays in 1.502e-05 s (20.59 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 1.812e-05 s (24.84 %) + memory allocations took 3.338e-06 s (4.58 %) + leafs setup took 9.537e-07 s (1.31 %) + halfword conversion took 6.914e-06 s (9.48 %) +Built (100 x 100)[0x57488cd0]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 24). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6902,12 +6953,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6918,22 +6969,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.000e-04 s (100.00 %) - analyzed arrays in 4.792e-05 s (23.96 %) - cleaned-up arrays in 1.597e-05 s (7.99 %) - deduplicated arrays in 1.717e-05 s (8.58 %) + converted COO to RSB in 1.061e-04 s (100.00 %) + analyzed arrays in 2.694e-05 s (25.39 %) + cleaned-up arrays in 1.097e-05 s (10.34 %) + deduplicated arrays in 1.311e-05 s (12.36 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.702e-05 s (43.50 %) - memory allocations took 3.815e-06 s (1.91 %) - leafs setup took 1.192e-06 s (0.60 %) - halfword conversion took 2.599e-05 s (12.99 %) -Built (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + shuffled partitions in 3.099e-05 s (29.21 %) + memory allocations took 6.914e-06 s (6.52 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 1.621e-05 s (15.28 %) +Built (100 x 100)[0x5747fe70]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6944,11 +6995,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6959,11 +7010,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6973,16 +7024,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000304s; avg 0.0001013s ( +/- 3.29/ 5.65 %); best 9.799e-05s; worst 0.000107s; std dev. 4.065e-06 (taking best). -Reference operation time is 9.799e-05 s (824.6 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 9.799e-05 s/0 threads (speedup 0.610706 x), same?n. +3 iterations (1 th.) took 0.0002589s; avg 8.631e-05s ( +/- 1.66/ 0.83 %); best 8.488e-05s; worst 8.702e-05s; std dev. 1.012e-06 (taking best). +Reference operation time is 8.4877e-05 s (952 Mflops) with 1 threads. +Challenging best inner round reference (6.10352e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 8.4877e-05 s/0 threads (speedup 0.719101 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6993,12 +7044,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7009,22 +7060,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.188e-04 s (100.00 %) - analyzed arrays in 1.051e-04 s (32.98 %) - cleaned-up arrays in 7.892e-05 s (24.76 %) - deduplicated arrays in 1.597e-05 s (5.01 %) - sorted arrays in 1.192e-06 s (0.37 %) - shuffled partitions in 7.701e-05 s (24.16 %) - memory allocations took 5.722e-06 s (1.80 %) - leafs setup took 1.907e-06 s (0.60 %) - halfword conversion took 3.099e-05 s (9.72 %) -Built (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 2.499e-04 s (100.00 %) + analyzed arrays in 1.190e-04 s (47.61 %) + cleaned-up arrays in 1.097e-05 s (4.39 %) + deduplicated arrays in 1.311e-05 s (5.25 %) + sorted arrays in 9.537e-07 s (0.38 %) + shuffled partitions in 8.202e-05 s (32.82 %) + memory allocations took 6.914e-06 s (2.77 %) + leafs setup took 1.907e-06 s (0.76 %) + halfword conversion took 1.502e-05 s (6.01 %) +Built (100 x 100)[0x57488db0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7035,11 +7086,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7050,11 +7101,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7064,16 +7115,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003331s; avg 0.000111s ( +/- 1.86/ 3.51 %); best 0.000109s; worst 0.0001149s; std dev. 2.755e-06 (taking best). -Reference operation time is 0.000108957 s (741.6 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.000108957 s/0 threads (speedup 0.549234 x), same?n. +3 iterations (1 th.) took 0.000283s; avg 9.433e-05s ( +/- 0.42/ 0.59 %); best 9.394e-05s; worst 9.489e-05s; std dev. 4.052e-07 (taking best). +Reference operation time is 9.39369e-05 s (860.2 Mflops) with 1 threads. +Challenging best inner round reference (6.10352e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 9.39369e-05 s/0 threads (speedup 0.649746 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7084,12 +7135,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7100,22 +7151,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.898e-04 s (100.00 %) - analyzed arrays in 1.981e-04 s (50.83 %) - cleaned-up arrays in 1.502e-05 s (3.85 %) - deduplicated arrays in 1.597e-05 s (4.10 %) - sorted arrays in 1.192e-06 s (0.31 %) - shuffled partitions in 1.230e-04 s (31.56 %) - memory allocations took 6.676e-06 s (1.71 %) - leafs setup took 4.053e-06 s (1.04 %) - halfword conversion took 2.384e-05 s (6.12 %) -Built (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 26, symflags:'LS' + converted COO to RSB in 1.681e-04 s (100.00 %) + analyzed arrays in 6.914e-05 s (41.13 %) + cleaned-up arrays in 1.097e-05 s (6.52 %) + deduplicated arrays in 1.407e-05 s (8.37 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 4.411e-05 s (26.24 %) + memory allocations took 1.383e-05 s (8.23 %) + leafs setup took 1.907e-06 s (1.13 %) + halfword conversion took 1.407e-05 s (8.37 %) +Built (100 x 100)[0x57488db0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 23, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7126,11 +7177,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7141,11 +7192,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7155,16 +7206,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.005703s; avg 0.001901s ( +/- 93.53/182.81 %); best 0.000123s; worst 0.005376s; std dev. 0.002457 (taking best). -Reference operation time is 0.000123024 s (656.8 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 1, 26 leaves, 2.317 bytes/nz, 0.000123024 s/0 threads (speedup 0.486434 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 36 subms, 26 lsubms, 2.3168 bpnz +3 iterations (1 th.) took 0.0002999s; avg 9.998e-05s ( +/- 1.99/ 2.07 %); best 9.799e-05s; worst 0.000102s; std dev. 1.656e-06 (taking best). +Reference operation time is 9.799e-05 s (824.6 Mflops) with 1 threads. +Challenging best inner round reference (6.10352e-05 s/1 threads) with: subdivision 1, 23 leaves, 2.295 bytes/nz, 9.799e-05 s/0 threads (speedup 0.622871 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 32 subms, 23 lsubms, 2.2947 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7175,12 +7226,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7191,22 +7242,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.691e-04 s (100.00 %) - analyzed arrays in 2.742e-04 s (48.18 %) - cleaned-up arrays in 1.812e-05 s (3.18 %) - deduplicated arrays in 1.907e-05 s (3.35 %) - sorted arrays in 9.537e-07 s (0.17 %) - shuffled partitions in 2.019e-04 s (35.48 %) - memory allocations took 9.775e-06 s (1.72 %) - leafs setup took 5.007e-06 s (0.88 %) - halfword conversion took 3.791e-05 s (6.66 %) -Built (100 x 100)[0x57f07000]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 2.308e-04 s (100.00 %) + analyzed arrays in 1.040e-04 s (45.04 %) + cleaned-up arrays in 1.192e-05 s (5.17 %) + deduplicated arrays in 1.311e-05 s (5.68 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 7.415e-05 s (32.13 %) + memory allocations took 7.868e-06 s (3.41 %) + leafs setup took 3.815e-06 s (1.65 %) + halfword conversion took 1.597e-05 s (6.92 %) +Built (100 x 100)[0x57488db0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 50, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7217,11 +7268,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7232,11 +7283,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7246,16 +7297,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003688s; avg 0.0001229s ( +/- 2.46/ 4.91 %); best 0.0001199s; worst 0.000129s; std dev. 4.271e-06 (taking best). -Reference operation time is 0.000119925 s (673.8 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.000119925 s/0 threads (speedup 0.499006 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +3 iterations (1 th.) took 0.0003309s; avg 0.0001103s ( +/- 1.22/ 0.72 %); best 0.000109s; worst 0.0001111s; std dev. 9.603e-07 (taking best). +Reference operation time is 0.000108957 s (741.6 Mflops) with 1 threads. +Challenging best inner round reference (6.10352e-05 s/1 threads) with: subdivision 2, 50 leaves, 2.425 bytes/nz, 0.000108957 s/0 threads (speedup 0.560175 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 68 subms, 50 lsubms, 2.4246 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7266,12 +7317,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7282,22 +7333,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.231e-04 s (100.00 %) - analyzed arrays in 2.592e-04 s (49.54 %) - cleaned-up arrays in 1.502e-05 s (2.87 %) - deduplicated arrays in 1.597e-05 s (3.05 %) - sorted arrays in 1.192e-06 s (0.23 %) - shuffled partitions in 1.879e-04 s (35.92 %) - memory allocations took 8.821e-06 s (1.69 %) - leafs setup took 5.007e-06 s (0.96 %) - halfword conversion took 2.813e-05 s (5.38 %) -Built (100 x 100)[0x57f2de80]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 3.400e-04 s (100.00 %) + analyzed arrays in 1.450e-04 s (42.64 %) + cleaned-up arrays in 1.097e-05 s (3.23 %) + deduplicated arrays in 1.407e-05 s (4.14 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 1.380e-04 s (40.60 %) + memory allocations took 9.060e-06 s (2.66 %) + leafs setup took 5.960e-06 s (1.75 %) + halfword conversion took 1.693e-05 s (4.98 %) +Built (100 x 100)[0x574afc30]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 102, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7308,11 +7359,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7323,11 +7374,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7337,17 +7388,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0004301s; avg 0.0001434s ( +/- 16.19/ 15.74 %); best 0.0001202s; worst 0.0001659s; std dev. 1.869e-05 (taking best). -Reference operation time is 0.000120163 s (672.4 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.000120163 s/0 threads (speedup 0.498016 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1350.2 Mflops. +3 iterations (1 th.) took 0.000397s; avg 0.0001323s ( +/- 0.36/ 0.54 %); best 0.0001318s; worst 0.000133s; std dev. 5.15e-07 (taking best). +Reference operation time is 0.000131845 s (612.8 Mflops) with 1 threads. +Challenging best inner round reference (6.10352e-05 s/1 threads) with: subdivision 4, 102 leaves, 2.653 bytes/nz, 0.000131845 s/0 threads (speedup 0.462929 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 138 subms, 102 lsubms, 2.6527 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1323.83 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7357,29 +7408,29 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.011811 s (eq. to 2e+02/ 2e+02 old/new op.times), gained local/global speedup 1 x (5.98431e-05 : 5.98431e-05) / 1 x (5.98431e-05 : 5.98431e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.003901 s (eq. to 6e+01/ 6e+01 old/new op.times), gained local/global speedup 1 x (6.10352e-05 : 6.10352e-05) / 1 x (6.10352e-05 : 6.10352e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.012s, 0.0037s for constructor, 0 clones) obtained NO speedup (best stays 1350 Mflops). -Second run of RSB Autotuner took 0.011858 s and estimated a speedup of 1.000000 x (5.984e-05 s -> 5.984e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.0039s, 0.0017s for constructor, 0 clones) obtained NO speedup (best stays 1324 Mflops). +Second run of RSB Autotuner took 0.00394416 s and estimated a speedup of 1.000000 x (6.104e-05 s -> 6.104e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.000036 0.000119 0.000155 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000156 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000036 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000119 -%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000001 -%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.000 -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000155 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000021 0.000045 0.000066 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000066 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000021 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000045 +%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 +%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000066 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SUBDIVISION_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SHUFFLE_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 -%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:lower-100x100-5050nz S N 1 100 100 5050 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:lower-100x100-5050nz S N 1 100 100 5050 10504 40400 20600 @@ -7388,45 +7439,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.000231028 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.000109911 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 1.19209e-06 3.60012e-05 0 0.000118971 -# so far, program took 6.249s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.481s/0.000s . +%constructor:lower-100x100-5050nz 0 2.12193e-05 0 4.48227e-05 +# so far, program took 6.753s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.036s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.07561s (system CPU time used) -ru_utime : 13.13s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1484s (system CPU time used) +ru_utime : 8.163s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 6.249s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.481s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 6.753s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.036s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.169e-04 s (100.00 %) - analyzed arrays in 4.292e-05 s (13.54 %) - cleaned-up arrays in 1.693e-05 s (5.34 %) - deduplicated arrays in 1.717e-05 s (5.42 %) + converted COO to RSB in 1.402e-04 s (100.00 %) + analyzed arrays in 2.098e-05 s (14.97 %) + cleaned-up arrays in 1.097e-05 s (7.82 %) + deduplicated arrays in 1.311e-05 s (9.35 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.659e-04 s (52.37 %) - memory allocations took 1.907e-05 s (6.02 %) - leafs setup took 9.537e-07 s (0.30 %) - halfword conversion took 5.198e-05 s (16.40 %) -Built (100 x 100)[0x57f07000]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57f07000]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 6.390e-05 s (45.58 %) + memory allocations took 1.407e-05 s (10.03 %) + leafs setup took 9.537e-07 s (0.68 %) + halfword conversion took 1.502e-05 s (10.71 %) +Built (100 x 100)[0x57488db0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.000s): (100 x 100)[0x57488db0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7437,11 +7488,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7452,11 +7503,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7466,16 +7517,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000428s; avg 0.0001427s ( +/- 9.58/ 17.83 %); best 0.000129s; worst 0.0001681s; std dev. 1.8e-05 (taking best). -Reference operation time is 0.000128984 s (626.4 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.000129 Mflops: 626.432) -Merge (3 -> 1 leaves) took w.c.t. of 0.0001628s, ~0.000138s of computing time (of which 8.106e-05s sorting, 3.099e-06s analysis) +3 iterations (1 th.) took 0.0003672s; avg 0.0001224s ( +/- 22.27/ 38.12 %); best 9.513e-05s; worst 0.000169s; std dev. 3.314e-05 (taking best). +Reference operation time is 9.5129e-05 s (849.4 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 9.513e-05 Mflops: 849.373) +Merge (3 -> 1 leaves) took w.c.t. of 0.000107s, ~8.893e-05s of computing time (of which 5.388e-05s sorting, 2.146e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7486,11 +7537,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7500,14 +7551,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.00021s; avg 7.002e-05s ( +/- 1.25/ 1.48 %); best 6.914e-05s; worst 7.105e-05s; std dev. 7.867e-07 (taking best). -Reference operation time is 6.91414e-05 s (1169 Mflops) with 1 threads. +3 iterations (1 th.) took 0.000175s; avg 5.833e-05s ( +/- 0.68/ 0.95 %); best 5.794e-05s; worst 5.889e-05s; std dev. 4.052e-07 (taking best). +Reference operation time is 5.79357e-05 s (1395 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7517,25 +7568,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 6.914e-05 s ~Mflops: 1168.620 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.866x: 0.000129s -> 6.914e-05s, so taking this instance. +After merge step 1: tpop: 5.794e-05 s ~Mflops: 1394.649 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.642x: 9.513e-05s -> 5.794e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.007717s (of which 0.00017s partitioning, 0.007205s I/O); computing times: 0.000138s in par. loops, 8.106e-05s sorting, 3.099e-06s analyzing) -Total merge + benchmarking process took 0.007717s, equivalent to 111.6/59.8 new/old ops (0.0001481s for 2 clones -- as 2.1/1.1 ops, or 1.1/0.6 ops per clone), SPEEDUP of 1.866x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.866x (0.000129s -> 6.914e-05s), will amortize in 129.0 ops by saving 5.984e-05s per op. -In 1 tuning rounds (tot. 0.0085s, 0.00015s for constructor, 2 clones) obtained a SPEEDUP of 86.6% (1.866x) (from 626.4 to 1169 Mflops). Employed 0.0061s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004768s (of which 0.0001111s partitioning, 0.004393s I/O); computing times: 8.893e-05s in par. loops, 5.388e-05s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.004768s, equivalent to 82.3/50.1 new/old ops (7.01e-05s for 2 clones -- as 1.2/0.7 ops, or 0.6/0.4 ops per clone), SPEEDUP of 1.642x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.642x (9.513e-05s -> 5.794e-05s), will amortize in 128.2 ops by saving 3.719e-05s per op. +In 1 tuning rounds (tot. 0.0053s, 7e-05s for constructor, 2 clones) obtained a SPEEDUP of 64.2% (1.642x) (from 849.4 to 1395 Mflops). Employed 0.0044s for I/O of matrix plots. #pr: updating sample at index 4 (3^th of 4), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.0147982 s (1.290e-04 s -> 6.914e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.00968099 s (9.513e-05 s -> 5.794e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7546,11 +7597,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7561,13 +7612,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 13. +Starting with requested 0 threads ; current default 1 ; at most 24. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7578,11 +7629,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7592,27 +7643,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001829s; avg 6.096e-05s ( +/- 1.83/ 1.69 %); best 5.984e-05s; worst 6.199e-05s; std dev. 8.778e-07 (taking best). -Reference operation time is 5.98431e-05 s (1350 Mflops) with 1 threads. +3 iterations (1 th.) took 0.000176s; avg 5.865e-05s ( +/- 1.22/ 0.81 %); best 5.794e-05s; worst 5.913e-05s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.79357e-05 s (1395 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.199e-04 s (100.00 %) - analyzed arrays in 2.599e-05 s (21.67 %) - cleaned-up arrays in 1.812e-05 s (15.11 %) - deduplicated arrays in 1.907e-05 s (15.90 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.791e-05 s (31.61 %) - memory allocations took 6.914e-06 s (5.77 %) - leafs setup took 1.907e-06 s (1.59 %) - halfword conversion took 8.106e-06 s (6.76 %) -Built (100 x 100)[0x57f06f20]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 13). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 6.390e-05 s (100.00 %) + analyzed arrays in 1.216e-05 s (19.03 %) + cleaned-up arrays in 1.097e-05 s (17.16 %) + deduplicated arrays in 1.287e-05 s (20.15 %) + sorted arrays in 1.192e-06 s (1.87 %) + shuffled partitions in 1.788e-05 s (27.99 %) + memory allocations took 2.861e-06 s (4.48 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 5.960e-06 s (9.33 %) +Built (100 x 100)[0x57488cd0]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 24). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7623,12 +7674,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7639,22 +7690,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.768e-04 s (100.00 %) - analyzed arrays in 2.351e-04 s (49.30 %) - cleaned-up arrays in 5.317e-05 s (11.15 %) - deduplicated arrays in 1.907e-05 s (4.00 %) - sorted arrays in 9.537e-07 s (0.20 %) - shuffled partitions in 1.321e-04 s (27.70 %) - memory allocations took 5.722e-06 s (1.20 %) - leafs setup took 1.907e-06 s (0.40 %) - halfword conversion took 2.694e-05 s (5.65 %) -Built (100 x 100)[0x57f07000]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 1.481e-04 s (100.00 %) + analyzed arrays in 3.719e-05 s (25.12 %) + cleaned-up arrays in 1.097e-05 s (7.41 %) + deduplicated arrays in 1.407e-05 s (9.50 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 4.697e-05 s (31.72 %) + memory allocations took 2.861e-06 s (1.93 %) + leafs setup took 9.537e-07 s (0.64 %) + halfword conversion took 3.505e-05 s (23.67 %) +Built (100 x 100)[0x5748be60]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7665,11 +7716,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7680,11 +7731,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7694,16 +7745,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000329s; avg 0.0001097s ( +/- 2.39/ 4.78 %); best 0.000107s; worst 0.0001149s; std dev. 3.709e-06 (taking best). -Reference operation time is 0.00010705 s (754.8 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 0.00010705 s/0 threads (speedup 0.55902 x), same?n. +3 iterations (1 th.) took 0.000278s; avg 9.267e-05s ( +/- 0.69/ 1.37 %); best 9.203e-05s; worst 9.394e-05s; std dev. 8.991e-07 (taking best). +Reference operation time is 9.20296e-05 s (878 Mflops) with 1 threads. +Challenging best inner round reference (5.79357e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 9.20296e-05 s/0 threads (speedup 0.629534 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7714,12 +7765,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7730,22 +7781,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.471e-04 s (100.00 %) - analyzed arrays in 1.431e-04 s (41.21 %) - cleaned-up arrays in 1.597e-05 s (4.60 %) - deduplicated arrays in 1.597e-05 s (4.60 %) - sorted arrays in 9.537e-07 s (0.27 %) - shuffled partitions in 1.349e-04 s (38.87 %) - memory allocations took 6.199e-06 s (1.79 %) - leafs setup took 4.053e-06 s (1.17 %) - halfword conversion took 2.503e-05 s (7.21 %) -Built (100 x 100)[0x57f07000]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 18, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 1.700e-04 s (100.00 %) + analyzed arrays in 7.606e-05 s (44.74 %) + cleaned-up arrays in 1.097e-05 s (6.45 %) + deduplicated arrays in 1.311e-05 s (7.71 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 4.697e-05 s (27.63 %) + memory allocations took 5.960e-06 s (3.51 %) + leafs setup took 1.907e-06 s (1.12 %) + halfword conversion took 1.502e-05 s (8.84 %) +Built (100 x 100)[0x57488db0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7756,11 +7807,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7771,11 +7822,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7785,16 +7836,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000392s; avg 0.0001307s ( +/- 2.01/ 3.28 %); best 0.000128s; worst 0.0001349s; std dev. 3.059e-06 (taking best). -Reference operation time is 0.000128031 s (631.1 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 0.5, 18 leaves, 2.261 bytes/nz, 0.000128031 s/0 threads (speedup 0.467412 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 25 subms, 18 lsubms, 2.2614 bpnz +3 iterations (1 th.) took 0.000325s; avg 0.0001083s ( +/- 1.17/ 1.47 %); best 0.000107s; worst 0.0001099s; std dev. 1.189e-06 (taking best). +Reference operation time is 0.00010705 s (754.8 Mflops) with 1 threads. +Challenging best inner round reference (5.79357e-05 s/1 threads) with: subdivision 0.5, 16 leaves, 2.25 bytes/nz, 0.00010705 s/0 threads (speedup 0.541203 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7805,12 +7856,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7821,22 +7872,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.698e-04 s (100.00 %) - analyzed arrays in 3.109e-04 s (54.56 %) - cleaned-up arrays in 1.597e-05 s (2.80 %) - deduplicated arrays in 1.597e-05 s (2.80 %) - sorted arrays in 9.537e-07 s (0.17 %) - shuffled partitions in 1.819e-04 s (31.92 %) - memory allocations took 1.025e-05 s (1.80 %) - leafs setup took 4.053e-06 s (0.71 %) - halfword conversion took 2.789e-05 s (4.90 %) -Built (100 x 100)[0x57f555d0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' + converted COO to RSB in 2.022e-04 s (100.00 %) + analyzed arrays in 9.012e-05 s (44.58 %) + cleaned-up arrays in 1.192e-05 s (5.90 %) + deduplicated arrays in 1.311e-05 s (6.49 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 6.390e-05 s (31.60 %) + memory allocations took 4.053e-06 s (2.00 %) + leafs setup took 3.099e-06 s (1.53 %) + halfword conversion took 1.502e-05 s (7.43 %) +Built (100 x 100)[0x57488db0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 35, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7847,11 +7898,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7862,11 +7913,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7876,16 +7927,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.01228s; avg 0.004092s ( +/- 96.36/192.06 %); best 0.000149s; worst 0.01195s; std dev. 0.005557 (taking best). -Reference operation time is 0.000149012 s (542.2 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 1, 36 leaves, 2.383 bytes/nz, 0.000149012 s/0 threads (speedup 0.4016 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +3 iterations (1 th.) took 0.000356s; avg 0.0001187s ( +/- 2.14/ 3.68 %); best 0.0001161s; worst 0.000123s; std dev. 3.105e-06 (taking best). +Reference operation time is 0.00011611 s (695.9 Mflops) with 1 threads. +Challenging best inner round reference (5.79357e-05 s/1 threads) with: subdivision 1, 35 leaves, 2.354 bytes/nz, 0.00011611 s/0 threads (speedup 0.498973 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 48 subms, 35 lsubms, 2.3541 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7896,12 +7947,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7912,22 +7963,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.949e-04 s (100.00 %) - analyzed arrays in 2.398e-04 s (40.32 %) - cleaned-up arrays in 1.502e-05 s (2.53 %) - deduplicated arrays in 1.693e-05 s (2.85 %) - sorted arrays in 9.537e-07 s (0.16 %) - shuffled partitions in 2.160e-04 s (36.31 %) - memory allocations took 2.933e-05 s (4.93 %) - leafs setup took 5.960e-06 s (1.00 %) - halfword conversion took 6.890e-05 s (11.58 %) -Built (100 x 100)[0x57f555d0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 2.780e-04 s (100.00 %) + analyzed arrays in 1.299e-04 s (46.74 %) + cleaned-up arrays in 1.216e-05 s (4.37 %) + deduplicated arrays in 1.287e-05 s (4.63 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 9.298e-05 s (33.45 %) + memory allocations took 1.001e-05 s (3.60 %) + leafs setup took 4.053e-06 s (1.46 %) + halfword conversion took 1.597e-05 s (5.75 %) +Built (100 x 100)[0x574d7380]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 72, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7938,11 +7989,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7953,11 +8004,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7967,16 +8018,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000421s; avg 0.0001403s ( +/- 2.32/ 3.28 %); best 0.0001371s; worst 0.000145s; std dev. 3.351e-06 (taking best). -Reference operation time is 0.000137091 s (589.4 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.000137091 s/0 threads (speedup 0.436522 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +3 iterations (1 th.) took 0.0004029s; avg 0.0001343s ( +/- 0.95/ 1.89 %); best 0.000133s; worst 0.0001369s; std dev. 1.798e-06 (taking best). +Reference operation time is 0.000133038 s (607.3 Mflops) with 1 threads. +Challenging best inner round reference (5.79357e-05 s/1 threads) with: subdivision 2, 72 leaves, 2.531 bytes/nz, 0.000133038 s/0 threads (speedup 0.435484 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 98 subms, 72 lsubms, 2.5315 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7987,12 +8038,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 40329 bytes +# Cache block size total 4194304 bytes, per-thread 174762 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 -# RSB_IO_WANT_EXECUTING_THREADS: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 +# RSB_IO_WANT_EXECUTING_THREADS: 24 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -8003,22 +8054,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.891e-04 s (100.00 %) - analyzed arrays in 3.028e-04 s (51.40 %) - cleaned-up arrays in 1.597e-05 s (2.71 %) - deduplicated arrays in 1.597e-05 s (2.71 %) - sorted arrays in 9.537e-07 s (0.16 %) - shuffled partitions in 1.922e-04 s (32.62 %) - memory allocations took 3.028e-05 s (5.14 %) - leafs setup took 5.007e-06 s (0.85 %) - halfword conversion took 2.599e-05 s (4.41 %) -Built (100 x 100)[0x57f555d0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 + converted COO to RSB in 3.948e-04 s (100.00 %) + analyzed arrays in 1.750e-04 s (44.32 %) + cleaned-up arrays in 1.192e-05 s (3.02 %) + deduplicated arrays in 1.311e-05 s (3.32 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 1.531e-04 s (38.77 %) + memory allocations took 1.884e-05 s (4.77 %) + leafs setup took 6.914e-06 s (1.75 %) + halfword conversion took 1.597e-05 s (4.05 %) +Built (100 x 100)[0x574d7380]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 120, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8029,11 +8080,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8044,11 +8095,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8058,17 +8109,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000452s; avg 0.0001507s ( +/- 9.81/ 12.82 %); best 0.0001359s; worst 0.00017s; std dev. 1.428e-05 (taking best). -Reference operation time is 0.000135899 s (594.6 Mflops) with 1 threads. -Challenging best inner round reference (5.98431e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.000135899 s/0 threads (speedup 0.440351 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1350.2 Mflops. +3 iterations (1 th.) took 0.000457s; avg 0.0001523s ( +/- 0.78/ 1.10 %); best 0.0001512s; worst 0.000154s; std dev. 1.216e-06 (taking best). +Reference operation time is 0.000151157 s (534.5 Mflops) with 1 threads. +Challenging best inner round reference (5.79357e-05 s/1 threads) with: subdivision 4, 120 leaves, 2.726 bytes/nz, 0.000151157 s/0 threads (speedup 0.383281 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 162 subms, 120 lsubms, 2.7255 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1394.65 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 13 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 24 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8078,23 +8129,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.021811 s (eq. to 4e+02/ 4e+02 old/new op.times), gained local/global speedup 1 x (5.98431e-05 : 5.98431e-05) / 1 x (5.98431e-05 : 5.98431e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.00572705 s (eq. to 1e+02/ 1e+02 old/new op.times), gained local/global speedup 1 x (5.79357e-05 : 5.79357e-05) / 1 x (5.79357e-05 : 5.79357e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.022s, 0.0063s for constructor, 0 clones) obtained NO speedup (best stays 1350 Mflops). -Second run of RSB Autotuner took 0.021862 s and estimated a speedup of 1.000000 x (5.984e-05 s -> 5.984e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.0057s, 0.0032s for constructor, 0 clones) obtained NO speedup (best stays 1395 Mflops). +Second run of RSB Autotuner took 0.00576091 s and estimated a speedup of 1.000000 x (5.794e-05 s -> 5.794e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000043 0.000166 0.000209 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000209 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000043 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000166 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000021 0.000064 0.000085 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000085 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000021 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000064 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000209 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000085 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -8109,68 +8160,68 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.000316858 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.00014019 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 4.29153e-05 0 0.000165939 -# so far, program took 6.309s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.517s/0.000s . +%constructor:lower-100x100-5050nz 0 2.09808e-05 0 6.38962e-05 +# so far, program took 6.797s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.051s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.08355s (system CPU time used) -ru_utime : 13.86s (user CPU time used) +ru_maxrss: 21 (maximum resident set size -- MB) +ru_stime : 0.1692s (system CPU time used) +ru_utime : 8.788s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 4) ordered by (1,1,1,1,1,4,1) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1366.53 4.601e-05 0.000e+00 1.478e-05 0.000e+00 6.243e-02 3.61e+00 2.60e+00 1 2.02e-02 -pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1344.84 3.910e-05 0.000e+00 1.502e-05 0.000e+00 1.625e-02 2.12e+00 1.56e+00 1 2.02e-02 -pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 1344.84 9.894e-05 0.000e+00 6.008e-05 0.000e+00 2.436e-02 8.87e-01 6.50e-01 1 8.08e-02 -pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 1168.62 1.290e-04 0.000e+00 6.914e-05 0.000e+00 1.480e-02 1.39e+00 1.17e+00 1 8.08e-02 +pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1841.85 3.099e-05 0.000e+00 1.097e-05 0.000e+00 8.992e-03 4.86e+00 2.60e+00 1 2.02e-02 +pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1694.50 3.004e-05 0.000e+00 1.192e-05 0.000e+00 8.479e-03 2.68e+00 1.56e+00 1 2.02e-02 +pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 1323.83 8.607e-05 0.000e+00 6.104e-05 0.000e+00 9.154e-03 8.73e-01 6.50e-01 1 8.08e-02 +pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 1394.65 9.513e-05 0.000e+00 5.794e-05 0.000e+00 9.681e-03 1.66e+00 1.17e+00 1 8.08e-02 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 130.7 % faster, avg. sp. ratio 2.307x, max sp. ratio 3.113x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1481.1/214.0/4223.1/5924.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 533.3/114.7/1356.7/2133.1 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 886.9, min. 247.3, max. 1998.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 110.0 % faster, avg. sp. ratio 2.100x, max sp. ratio 2.826x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 462.1/150.0/819.9/1848.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 195.1/101.8/290.1/780.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 385.7, min. 260.3, max. 467.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 1683/ 1683/ 1683) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 5050/ 5050/ 5050) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 15150/ 6733/ 26933) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 45450/ 20200/ 80800) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 2.121/ 2.121/ 2.121) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.973/ 0.874/ 3.552,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.007/ 0.887/ 3.606,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.480/ 0.860/ 4.787,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.069/ 0.873/ 4.860,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.495/ 0.650/ 2.599) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 2.080/ 2.080/ 2.080) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.01 s, max 0.06 s, tot 0.12 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.01 s, max 0.06 s, tot 0.12 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.306e+03, min 1.169e+03, max 1.367e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 5.997e+02, min 4.390e+02, max 8.166e+02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 3.976e-05s, min 1.478e-05s, max 6.914e-05s, tot 1.590e-04s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 7.826e-05s, min 3.910e-05s, max 1.290e-04s, tot 3.130e-04s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.153e+00 8.749e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.860e+00 -#pr: Record collection took 0.68 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.01 s, min 0.01 s, max 0.01 s, tot 0.04 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.01 s, min 0.01 s, max 0.01 s, tot 0.04 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.564e+03, min 1.324e+03, max 1.842e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 7.781e+02, min 6.517e+02, max 9.388e+02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.546e-05s, min 1.097e-05s, max 6.104e-05s, tot 1.419e-04s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 6.056e-05s, min 3.004e-05s, max 9.513e-05s, tot 2.422e-04s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.970e+00 2.766e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.666e+00 +#pr: Record collection took 0.13 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 99 environment variables in 4772 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 99 environment variables in 4825 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. #pr: ======== Saved a performance record of 4 samples to test.rpr # Removing the temporary record file test.rpr.tmp. -# terminating run at 1740464040 (after 6.3s of w.c.t.) +# terminating run at 1774875134 (after 6.8s of w.c.t.) + ls -ltr test-tuning-lower-100x100-5050nz--C-N-1--base.eps test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--D-N-1--base.eps test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--S-N-1--base.eps test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--Z-N-1--base.eps test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85629 Feb 24 18:13 test-tuning-lower-100x100-5050nz--D-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84552 Feb 24 18:13 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85630 Feb 24 18:14 test-tuning-lower-100x100-5050nz--S-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84552 Feb 24 18:14 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85629 Feb 24 18:14 test-tuning-lower-100x100-5050nz--C-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84552 Feb 24 18:14 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85630 Feb 24 18:14 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84552 Feb 24 18:14 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85640 Mar 31 02:52 test-tuning-lower-100x100-5050nz--D-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84562 Mar 31 02:52 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85640 Mar 31 02:52 test-tuning-lower-100x100-5050nz--S-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84562 Mar 31 02:52 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85638 Mar 31 02:52 test-tuning-lower-100x100-5050nz--C-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84562 Mar 31 02:52 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85640 Mar 31 02:52 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84562 Mar 31 02:52 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps + rsbench --read-performance-record test.rpr + ls -ltr test.txt --rw-r--r-- 1 pbuilder1 pbuilder1 4087 Feb 24 18:14 test.txt +-rw-r--r-- 1 pbuilder2 pbuilder2 4082 Mar 31 02:52 test.txt + RSB_PR_WLTC=2 + RSB_PR_SR=0 + rsbench --read-performance-record test.rpr @@ -8180,29 +8231,29 @@ /usr/bin/kpsepath ++ kpsepath tex ++ sed 's/!!//g;s/:/\n/g;' -+ find . /nonexistent/first-build/.texlive2024/texmf-config/tex/kpsewhich// /nonexistent/first-build/.texlive2024/texmf-var/tex/kpsewhich// /nonexistent/first-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/first-build/.texlive2024/texmf-config/tex/generic// /nonexistent/first-build/.texlive2024/texmf-var/tex/generic// /nonexistent/first-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/first-build/.texlive2024/texmf-config/tex/latex// /nonexistent/first-build/.texlive2024/texmf-var/tex/latex// /nonexistent/first-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/first-build/.texlive2024/texmf-config/tex/// /nonexistent/first-build/.texlive2024/texmf-var/tex/// /nonexistent/first-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/kpsewhich//': No such file or directory ++ find . /nonexistent/second-build/.texlive2024/texmf-config/tex/kpsewhich// /nonexistent/second-build/.texlive2024/texmf-var/tex/kpsewhich// /nonexistent/second-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/second-build/.texlive2024/texmf-config/tex/generic// /nonexistent/second-build/.texlive2024/texmf-var/tex/generic// /nonexistent/second-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/second-build/.texlive2024/texmf-config/tex/latex// /nonexistent/second-build/.texlive2024/texmf-var/tex/latex// /nonexistent/second-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/second-build/.texlive2024/texmf-config/tex/// /nonexistent/second-build/.texlive2024/texmf-var/tex/// /nonexistent/second-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/kpsewhich//': No such file or directory find: '/usr/local/share/texmf/tex/kpsewhich//': No such file or directory find: '/etc/texmf/tex/kpsewhich//': No such file or directory find: '/var/lib/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texlive/texmf-dist/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/generic//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/generic//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/generic//': No such file or directory find: '/usr/local/share/texmf/tex/generic//': No such file or directory find: '/usr/share/texmf/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/latex//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/latex//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/latex//': No such file or directory find: '/usr/local/share/texmf/tex/latex//': No such file or directory find: '/etc/texmf/tex/latex//': No such file or directory find: '/var/lib/texmf/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex///': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex///': No such file or directory -find: '/nonexistent/first-build/texmf/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex///': No such file or directory +find: '/nonexistent/second-build/texmf/tex///': No such file or directory find: '/usr/local/share/texmf/tex///': No such file or directory + exit 0 for mf in pd.mtx vf.mtx ; do if test -f /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; then true; else cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/$mf /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; fi; done @@ -8217,7 +8268,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x56d99530]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x56c29540]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8232,7 +8283,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x57c1f530]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x5818c540]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8296,51 +8347,51 @@ Done. Building a matrix with 5 nnz, 5 x 5 Duplicates check: 5 - 0 = 5 - converted COO to RSB in 1.094e-01 s (100.00 %) - analyzed arrays in 3.533e-02 s (32.30 %) - cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 1.002e-02 s (9.16 %) - shuffled partitions in 3.200e-02 s (29.26 %) - memory allocations took 1.621e-05 s (0.01 %) - leafs setup took 3.099e-06 s (0.00 %) - halfword conversion took 3.199e-02 s (29.25 %) -Built (5 x 5)[0x57c20d50]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' + converted COO to RSB in 1.209e-04 s (100.00 %) + analyzed arrays in 2.193e-05 s (18.15 %) + cleaned-up arrays in 0.000e+00 s (0.00 %) + deduplicated arrays in 0.000e+00 s (0.00 %) + sorted arrays in 6.890e-05 s (57.00 %) + shuffled partitions in 1.407e-05 s (11.64 %) + memory allocations took 4.053e-06 s (3.35 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 1.192e-05 s (9.86 %) +Built (5 x 5)[0x5818eb80]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' Allocated matrix of 5 nonzeroes: -(5 x 5)[0x57c20d50]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(5 x 5)[0x5818eb80]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -Before auto-tuning, 100 multiplications took 0.699202s. +Before auto-tuning, 100 multiplications took 0.000674s. Threads autotuning (may take more than 1.500000s)... Will use autotuning routine to sample matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=6.63149e-07), 13 suggested as starting thread count(default). -3 iterations (13 th.) took 7.82e-05s; avg 2.607e-05s ( +/- 46.04/ 57.32 %); best 1.407e-05s; worst 4.101e-05s; std dev. 1.119e-05 (taking best). -Reference operation time is 1.40667e-05 s (1.422 Mflops) with 13 threads. -3 iterations (13 th.) took 4.196e-05s; avg 1.399e-05s ( +/- 6.25/ 7.39 %); best 1.311e-05s; worst 1.502e-05s; std dev. 7.867e-07 (taking best). -Reference operation time is 1.3113e-05 s (1.525 Mflops) with 13 threads. -After 0.000199s, autotuning routine did not find a better threads count configuration. -(5 x 5)[0x57c20d50]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -After threads auto-tuning, 100 multiplications took 0.001747s -- effective speedup of 400.255 x -Matrix autotuning (may take more than 1.500000s; using 13 threads )... +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=8.60929e-08), 24 suggested as starting thread count(default). +3 iterations (24 th.) took 2.193e-05s; avg 7.312e-06s ( +/- 5.43/ 7.61 %); best 6.914e-06s; worst 7.868e-06s; std dev. 4.052e-07 (taking best). +Reference operation time is 6.91414e-06 s (2.893 Mflops) with 24 threads. +3 iterations (24 th.) took 2.098e-05s; avg 6.994e-06s ( +/- 1.14/ 2.27 %); best 6.914e-06s; worst 7.153e-06s; std dev. 1.124e-07 (taking best). +Reference operation time is 6.91414e-06 s (2.893 Mflops) with 24 threads. +After 0.000090s, autotuning routine did not find a better threads count configuration. +(5 x 5)[0x5818eb80]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +After threads auto-tuning, 100 multiplications took 0.000649s -- effective speedup of 1.03857 x +Matrix autotuning (may take more than 1.500000s; using 24 threads )... Will autotune matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=6.63149e-07), 13 suggested as starting thread count. -3 iterations (13 th.) took 4.983e-05s; avg 1.661e-05s ( +/- 9.57/ 19.14 %); best 1.502e-05s; worst 1.979e-05s; std dev. 2.248e-06 (taking best). -Reference operation time is 1.50204e-05 s (1.332 Mflops) with 13 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 1.502e-05 Mflops: 1.332) -Merge (2 -> 1 leaves) took w.c.t. of 2.694e-05s, ~5.007e-06s of computing time (of which 0s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 41.18 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.052e-07 (taking best). -Reference operation time is 9.53674e-07 s (20.97 Mflops) with 13 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 20.972 nsubm:1 otn:13 -Applying merge (2 -> 1 leaves, 13 th.) yielded SPEEDUP of 15.750x: 1.502e-05s -> 9.537e-07s, so taking this instance. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=8.60929e-08), 24 suggested as starting thread count. +3 iterations (24 th.) took 2.503e-05s; avg 8.345e-06s ( +/- 17.14/ 31.43 %); best 6.914e-06s; worst 1.097e-05s; std dev. 1.857e-06 (taking best). +Reference operation time is 6.91414e-06 s (2.893 Mflops) with 24 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 6.914e-06 Mflops: 2.893) +Merge (2 -> 1 leaves) took w.c.t. of 1.001e-05s, ~3.099e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 87.96/ 66.67 %); best 8.609e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 8.60929e-08 s (232.3 Mflops) with 24 threads. +After merge step 1: tpop: 8.609e-08 s ~Mflops: 232.307 nsubm:1 otn:24 +Applying merge (2 -> 1 leaves, 24 th.) yielded SPEEDUP of 80.310x: 6.914e-06s -> 8.609e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.0003381s (of which 0.0001378s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 0s sorting, 4.053e-06s analyzing) -Total merge + benchmarking process took 0.0003381s, equivalent to 354.5/22.5 new/old ops (0.000222s for 2 clones -- as 232.8/14.8 ops, or 116.4/7.4 ops per clone), SPEEDUP of 15.750x -Applying multi-merge (2 -> 1 leaves, 1 steps, 13 -> 13 th.sp.) yielded SPEEDUP of 15.750x (1.502e-05s -> 9.537e-07s), will amortize in 24.0 ops by saving 1.407e-05s per op. -In 1 tuning rounds (tot. 0.00048s, 0.00022s for constructor, 2 clones) obtained a SPEEDUP of 1475.0% (15.75x) (from 1.332 to 20.97 Mflops). -After 0.000493s, autotuning routine declared speedup of 15.75 x, when using threads count of 13. -(5 x 5)[0x57c22d40]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' -After threads auto-tuning, 100 multiplications took 0.000028s -- further speedup of 62.6239 x +A total of 1 merge steps (of max 6) (2 -> 1 subms) took 5.913e-05s (of which 2.384e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 0s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 5.913e-05s, equivalent to 686.8/8.6 new/old ops (3.982e-05s for 2 clones -- as 462.5/5.8 ops, or 231.2/2.9 ops per clone), SPEEDUP of 80.310x +Applying multi-merge (2 -> 1 leaves, 1 steps, 24 -> 24 th.sp.) yielded SPEEDUP of 80.310x (6.914e-06s -> 8.609e-08s), will amortize in 8.7 ops by saving 6.828e-06s per op. +In 1 tuning rounds (tot. 0.00014s, 4e-05s for constructor, 2 clones) obtained a SPEEDUP of 7931.0% (80.31x) (from 2.893 to 232.3 Mflops). +After 0.000149s, autotuning routine declared speedup of 80.3102 x, when using threads count of 24. +(5 x 5)[0x58190ca0]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +After threads auto-tuning, 100 multiplications took 0.000031s -- further speedup of 20.9385 x 0/2 0 0 -> 0 1/2 1 0 -> 5 0/2 0 3 -> 0 @@ -8351,7 +8402,7 @@ Correctly initialized the library. Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x57c20d50]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x5818eb80]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8505,68 +8556,76 @@ Creating 500 x 500 matrix with 62500 nonzeroes. Building a matrix with 62500 nnz, 500 x 500 Duplicates check: 62500 - 0 = 62500 - converted COO to RSB in 2.008e-01 s (100.00 %) - analyzed arrays in 3.957e-02 s (19.70 %) - cleaned-up arrays in 1.662e-04 s (0.08 %) - deduplicated arrays in 2.038e-04 s (0.10 %) - sorted arrays in 8.086e-02 s (40.26 %) - shuffled partitions in 5.598e-02 s (27.87 %) - memory allocations took 3.600e-05 s (0.02 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 2.403e-02 s (11.96 %) -Built (500 x 500)[0x5795b360]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 58, symflags:'' + converted COO to RSB in 3.585e-03 s (100.00 %) + analyzed arrays in 1.180e-04 s (3.29 %) + cleaned-up arrays in 9.418e-05 s (2.63 %) + deduplicated arrays in 1.450e-04 s (4.04 %) + sorted arrays in 3.018e-03 s (84.18 %) + shuffled partitions in 1.450e-04 s (4.04 %) + memory allocations took 3.195e-05 s (0.89 %) + leafs setup took 3.099e-06 s (0.09 %) + halfword conversion took 2.694e-05 s (0.75 %) +Built (500 x 500)[0x581a3370]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Allocated matrix of 62500 nonzeroes: -(500 x 500)[0x5795b360]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 58, symflags:'' +(500 x 500)[0x581a3370]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' -Before auto-tuning, 100 multiplications took 0.947606s. +Before auto-tuning, 100 multiplications took 0.020557s. Threads autotuning (may take more than 1.500000s)... -Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 79 subms, 58 lsubms, 2.1238 bpnz. +Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0819 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.35858e-07), 13 suggested as starting thread count(default). -3 iterations (13 th.) took 0.04799s; avg 0.016s ( +/- 0.11/ 0.10 %); best 0.01598s; worst 0.01601s; std dev. 1.404e-05 (taking best). -Reference operation time is 0.0159788 s (15.65 Mflops) with 13 threads. -3 iterations (13 th.) took 0.04799s; avg 0.016s ( +/- 0.08/ 0.12 %); best 0.01599s; worst 0.01602s; std dev. 1.372e-05 (taking best). -Reference operation time is 0.015985 s (15.64 Mflops) with 13 threads. -After 0.096000s, autotuning routine did not find a better threads count configuration. -(500 x 500)[0x5795b360]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 58, symflags:'' -After threads auto-tuning, 100 multiplications took 1.096018s -- effective speedup of 0.86459 x -Matrix autotuning (may take more than 1.500000s; using 13 threads )... -Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 79 subms, 58 lsubms, 2.1238 bpnz. +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.0807e-08), 24 suggested as starting thread count(default). +3 iterations (24 th.) took 0.0006101s; avg 0.0002034s ( +/- 0.23/ 0.35 %); best 0.0002029s; worst 0.0002041s; std dev. 5.15e-07 (taking best). +Reference operation time is 0.000202894 s (1232 Mflops) with 24 threads. +3 iterations (24 th.) took 0.0005808s; avg 0.0001936s ( +/- 2.46/ 4.80 %); best 0.0001888s; worst 0.0002029s; std dev. 6.576e-06 (taking best). +Reference operation time is 0.000188828 s (1324 Mflops) with 24 threads. +After 0.001239s, autotuning routine did not find a better threads count configuration. +(500 x 500)[0x581a3370]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +After threads auto-tuning, 100 multiplications took 0.020136s -- effective speedup of 1.02091 x +Matrix autotuning (may take more than 1.500000s; using 24 threads )... +Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0819 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.35858e-07), 13 suggested as starting thread count. -3 iterations (13 th.) took 0.007521s; avg 0.002507s ( +/- 43.24/ 79.93 %); best 0.001423s; worst 0.004511s; std dev. 0.001419 (taking best). -Reference operation time is 0.00142312 s (175.7 Mflops) with 13 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 79 subms, 58 lsubms, 2.1238 bpnz (tpop: 0.001423 Mflops: 175.670) -Merge (58 -> 37 leaves) took w.c.t. of 0.01837s, ~0.03954s of computing time (of which 0.003817s sorting, 1.216e-05s analysis) -3 iterations (13 th.) took 0.04797s; avg 0.01599s ( +/- 0.05/ 0.10 %); best 0.01598s; worst 0.01601s; std dev. 1.158e-05 (taking best). -Reference operation time is 0.015981 s (15.64 Mflops) with 13 threads. -After merge step 1: tpop: 0.01598 s ~Mflops: 15.644 nsubm:37 otn:13 -Applying merge (58 -> 37 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 11.230x: 0.001423s -> 0.01598s. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.0807e-08), 24 suggested as starting thread count. +3 iterations (24 th.) took 0.0006208s; avg 0.0002069s ( +/- 4.38/ 6.22 %); best 0.0001979s; worst 0.0002198s; std dev. 9.352e-06 (taking best). +Reference operation time is 0.000197887 s (1263 Mflops) with 24 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0819 bpnz (tpop: 0.0001979 Mflops: 1263.345) +Merge (28 -> 22 leaves) took w.c.t. of 0.0005269s, ~0.0006442s of computing time (of which 0.0001292s sorting, 5.007e-06s analysis) +3 iterations (24 th.) took 0.001827s; avg 0.000609s ( +/- 75.38/149.77 %); best 0.00015s; worst 0.001521s; std dev. 0.000645 (taking best). +Reference operation time is 0.000149965 s (1667 Mflops) with 24 threads. +After merge step 1: tpop: 0.00015 s ~Mflops: 1667.052 nsubm:22 otn:24 +Applying merge (28 -> 22 leaves, 24 th.) yielded SPEEDUP of 1.320x: 0.0001979s -> 0.00015s, so taking this instance. +Merge (22 -> 16 leaves) took w.c.t. of 9.799e-05s, ~0.000154s of computing time (of which 6.294e-05s sorting, 4.053e-06s analysis) +3 iterations (24 th.) took 0.001895s; avg 0.0006317s ( +/- 83.54/166.75 %); best 0.000104s; worst 0.001685s; std dev. 0.0007448 (taking best). +Reference operation time is 0.000103951 s (2405 Mflops) with 24 threads. +After merge step 2: tpop: 0.000104 s ~Mflops: 2404.991 nsubm:16 otn:24 +Applying merge (22 -> 16 leaves, 24 th.) yielded SPEEDUP of 1.443x: 0.00015s -> 0.000104s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.0003171s, ~0.000603s of computing time (of which 0.000401s sorting, 3.099e-06s analysis) +3 iterations (24 th.) took 0.00164s; avg 0.0005467s ( +/- 81.16/162.15 %); best 0.000103s; worst 0.001433s; std dev. 0.0006268 (taking best). +Reference operation time is 0.000102997 s (2427 Mflops) with 24 threads. +After merge step 3: tpop: 0.000103 s ~Mflops: 2427.259 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00926x): 0.000104s -> 0.000103s, so IGNORING this instance. +Merge (10 -> 7 leaves) took w.c.t. of 0.0001531s, ~0.0001419s of computing time (of which 7.296e-05s sorting, 3.815e-06s analysis) +3 iterations (24 th.) took 0.000308s; avg 0.0001027s ( +/- 0.62/ 1.24 %); best 0.000102s; worst 0.000104s; std dev. 8.991e-07 (taking best). +Reference operation time is 0.000102043 s (2450 Mflops) with 24 threads. +After merge step 4: tpop: 0.000102 s ~Mflops: 2449.944 nsubm:7 otn:24 +Applying merge (10 -> 7 leaves, 24 th.) yielded SPEEDUP of 1.019x: 0.000104s -> 0.000102s, so taking this instance. +Merge (7 -> 4 leaves) took w.c.t. of 0.000145s, ~0.000138s of computing time (of which 7.319e-05s sorting, 3.099e-06s analysis) +3 iterations (24 th.) took 0.0003021s; avg 0.0001007s ( +/- 0.55/ 0.39 %); best 0.0001001s; worst 0.0001011s; std dev. 4.052e-07 (taking best). +Reference operation time is 0.000100136 s (2497 Mflops) with 24 threads. +After merge step 5: tpop: 0.0001001 s ~Mflops: 2496.610 nsubm:4 otn:24 +Applying merge (7 -> 4 leaves, 24 th.) yielded SPEEDUP of 1.019x: 0.000102s -> 0.0001001s, so taking this instance. +Merge (4 -> 3 leaves) took w.c.t. of 0.00014s, ~0.0001321s of computing time (of which 0s sorting, 1.192e-06s analysis) +3 iterations (24 th.) took 0.0005701s; avg 0.00019s ( +/- 1.13/ 2.13 %); best 0.0001879s; worst 0.0001941s; std dev. 2.868e-06 (taking best). +Reference operation time is 0.000187874 s (1331 Mflops) with 24 threads. +After merge step 6: tpop: 0.0001879 s ~Mflops: 1330.680 nsubm:3 otn:24 +Applying merge (4 -> 3 leaves, 24 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.876x: 0.0001001s -> 0.0001879s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 merge steps (of max 6) (58 -> 37 subms) took 0.06636s (of which 0.01838s partitioning, 0s I/O); computing times: 0.03954s in par. loops, 0.003817s sorting, 1.216e-05s analyzing) -Total merge + benchmarking process took 0.06636s, equivalent to 46.6/46.6 new/old ops (0.00607s for 1 clones -- as 4.3/4.3 ops, or 4.3/4.3 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 0.04801s; avg 0.016s ( +/- 0.10/ 0.08 %); best 0.01599s; worst 0.01602s; std dev. 1.196e-05 (taking best). -Reference operation time is 0.0159872 s (15.64 Mflops) with 13 threads. -Starting split (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 79 subms, 58 lsubms, 2.1238 bpnz (tpop: 0.01599 Mflops: 15.638) -Split (58 -> 145 leaves, 79 -> 195 subms) took 0.02762s (of which: 1.311e-05s analysis, -5.047e+10s mem.mgmt); compute time: 0.0614s overall, 0.0004594s searches, 0.06094s shuffle, 0.04165s switch, 0.0005703s quadrants. -3 iterations (13 th.) took 0.04381s; avg 0.0146s ( +/- 19.32/ 9.71 %); best 0.01178s; worst 0.01602s; std dev. 0.001996 (taking best). -Reference operation time is 0.0117822 s (21.22 Mflops) with 13 threads. -After split step 1: tpop: 0.01178 s ~Mflops: 21.219 nsubm:145 otn:13 -Applying split (58 -> 145 leaves, 13 th.) yielded SPEEDUP of 1.357x: 0.01599s -> 0.01178s, so taking this instance. -Split (145 -> 361 leaves, 195 -> 483 subms) took 0.02379s (of which: 3.099e-05s analysis, -1.253e+11s mem.mgmt); compute time: 0.009102s overall, 0.0006075s searches, 0.008495s shuffle, 0.002092s switch, 0.001468s quadrants. -3 iterations (13 th.) took 0.04778s; avg 0.01593s ( +/- 1.07/ 0.62 %); best 0.01576s; worst 0.01602s; std dev. 0.0001208 (taking best). -Reference operation time is 0.0157559 s (15.87 Mflops) with 13 threads. -After split step 2: tpop: 0.01576 s ~Mflops: 15.867 nsubm:361 otn:13 -Applying split (145 -> 361 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.337x: 0.01178s -> 0.01576s. -Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 split steps (of max 6) (58 -> 361 subms) took 0.1917s (of which 0.05212s partitioning, 0s I/O); computing times: 0.0705s in par. loops, 0.001067s sorting, 4.411e-05s analyzing) -Total split + benchmarking process took 0.1917s, equivalent to 16.3/12.0 new/old ops (0.09597s for 2 clones -- as 8.1/6.0 ops, or 4.1/3.0 ops per clone), SPEEDUP of 1.357x -Applying multi-split (58 -> 145 leaves, 1 steps, 13 -> 13 th.sp.) yielded SPEEDUP of 1.357x (0.01599s -> 0.01178s), will amortize in 45.6 ops by saving 0.004205s per op. -In 1 tuning rounds (tot. 0.37s, 0.1s for constructor, 3 clones) obtained a SPEEDUP of 35.7% (1.357x) (from 15.64 to 21.22 Mflops). -After 0.368482s, autotuning routine declared speedup of 1.35689 x, when using threads count of 13. -(500 x 500)[0x57a5b4c0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 145, symflags:'' -After threads auto-tuning, 100 multiplications took 1.463481s -- further speedup of 0.748911 x +A total of 6 merge steps (of max 6) (28 -> 3 subms) took 0.008423s (of which 0.001427s partitioning, 0s I/O); computing times: 0.001813s in par. loops, 0.0007393s sorting, 2.027e-05s analyzing) +Total merge + benchmarking process took 0.008423s, equivalent to 84.1/42.6 new/old ops (0.0005927s for 5 clones -- as 5.9/3.0 ops, or 1.2/0.6 ops per clone), SPEEDUP of 1.976x +Applying multi-merge (28 -> 4 leaves, 5 steps, 24 -> 24 th.sp.) yielded SPEEDUP of 1.976x (0.0001979s -> 0.0001001s), will amortize in 86.2 ops by saving 9.775e-05s per op. +In 1 tuning rounds (tot. 0.0095s, 0.00059s for constructor, 5 clones) obtained a SPEEDUP of 97.6% (1.976x) (from 1263 to 2497 Mflops). +After 0.009503s, autotuning routine declared speedup of 1.97619 x, when using threads count of 24. +(500 x 500)[0x5829eb70]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 4, symflags:'' +After threads auto-tuning, 100 multiplications took 0.010054s -- further speedup of 2.00275 x librsb timer-based profiling is not supported in this build. If you wish to have it, re-configure librsb with its support. So you can safely ignore the error you might just have seen printed out on screen. /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve Hello, RSB! @@ -8574,19 +8633,19 @@ Correctly initialized the library. Building a matrix with 7 nnz, 6 x 6 Duplicates check: 1 - 0 = 1 - converted COO to RSB in 5.458e-02 s (100.00 %) - analyzed arrays in 3.856e-02 s (70.64 %) - cleaned-up arrays in 1.001e-05 s (0.02 %) - deduplicated arrays in 2.146e-06 s (0.00 %) - sorted arrays in 1.907e-06 s (0.00 %) - shuffled partitions in 1.599e-02 s (29.30 %) - memory allocations took 1.597e-05 s (0.03 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 0.000e+00 s (0.00 %) -Built (6 x 6)[0x56a7b540]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' + converted COO to RSB in 8.659e-04 s (100.00 %) + analyzed arrays in 8.349e-04 s (96.42 %) + cleaned-up arrays in 5.960e-06 s (0.69 %) + deduplicated arrays in 0.000e+00 s (0.00 %) + sorted arrays in 9.537e-07 s (0.11 %) + shuffled partitions in 1.001e-05 s (1.16 %) + memory allocations took 7.868e-06 s (0.91 %) + leafs setup took 1.907e-06 s (0.22 %) + halfword conversion took 3.099e-06 s (0.36 %) +Built (6 x 6)[0x57afd550]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x56a7b540]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x57afd550]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8612,31 +8671,55 @@ 1 1 Will autotune matrix: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.303e-07 -3 iterations (13 th.) took 2.599e-05s; avg 8.663e-06s ( +/- 93.88/200.00 %); best 5.303e-07s; worst 2.599e-05s; std dev. 1.225e-05 (taking best). -Reference operation time is 5.30291e-07 s (3.772 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.303e-07 Mflops: 3.772) -Merge (1 -> 1 leaves) took w.c.t. of 9.537e-07s, ~0s of computing time (of which 0s sorting, 0s analysis) -3 iterations (13 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 28.57/ 14.29 %); best 1.192e-06s; worst 1.907e-06s; std dev. 3.372e-07 (taking best). -Reference operation time is 1.19209e-06 s (1.678 Mflops) with 13 threads. -After merge step 1: tpop: 1.192e-06 s ~Mflops: 1.678 nsubm:1 otn:13 -Applying merge (1 -> 1 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 2.248x: 5.303e-07s -> 1.192e-06s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 merge steps (of max 6) (1 -> 1 subms) took 3.505e-05s (of which 7.153e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) -Total merge + benchmarking process took 3.505e-05s, equivalent to 66.1/66.1 new/old ops (0.0239s for 1 clones -- as 45065.6/45065.6 ops, or 45065.6/45065.6 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.08e-08 +3 iterations (24 th.) took 2.384e-05s; avg 7.947e-06s ( +/- 99.36/188.00 %); best 5.08e-08s; worst 2.289e-05s; std dev. 1.057e-05 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.08e-08 Mflops: 39.374) +Merge (1 -> 1 leaves) took w.c.t. of 0s, ~0s of computing time (of which 0s sorting, 0s analysis) +3 iterations (24 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 92.90/ 66.67 %); best 5.08e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After merge step 1: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying merge (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 1.788e-05s (of which 2.861e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 1.788e-05s, equivalent to 352.0/352.0 new/old ops (2.503e-05s for 1 clones -- as 492.8/492.8 ops, or 492.8/492.8 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 16.59/200.00 %); best 5.303e-07s; worst 1.907e-06s; std dev. 8.991e-07 (taking best). -Reference operation time is 5.30291e-07 s (3.772 Mflops) with 13 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.303e-07 Mflops: 3.772) -Split (1 -> 1 leaves, 1 -> 1 subms) took 0s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 28.57 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.15e-07 (taking best). -Reference operation time is 9.53674e-07 s (2.097 Mflops) with 13 threads. -After split step 1: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:13 -Applying split (1 -> 1 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.798x: 5.303e-07s -> 9.537e-07s. -Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 split steps (of max 6) (1 -> 1 subms) took 2.885e-05s (of which 5.96e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) -Total split + benchmarking process took 2.885e-05s, equivalent to 54.4/54.4 new/old ops (0.02392s for 1 clones -- as 45098.0/45098.0 ops, or 45098.0/45098.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -In 1 tuning rounds (tot. 0.048s, 0.048s for constructor, 2 clones) obtained NO speedup (best stays 3.772 Mflops). +3 iterations (24 th.) took 1.192e-06s; avg 3.974e-07s ( +/- 87.22/200.00 %); best 5.08e-08s; worst 1.192e-06s; std dev. 5.62e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.08e-08 Mflops: 39.374) +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.217e-05s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 92.01/ 50.00 %); best 5.08e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 1: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 3.815e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 92.90/ 66.67 %); best 5.08e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 2: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 4.053e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 84.02/200.00 %); best 5.08e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 3: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 3.099e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 92.01/ 50.00 %); best 5.08e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 4: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.861e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 92.01/ 50.00 %); best 5.08e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 5: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.861e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (24 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 84.02/200.00 %); best 5.08e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.07951e-08 s (39.37 Mflops) with 24 threads. +After split step 6: tpop: 5.08e-08 s ~Mflops: 39.374 nsubm:1 otn:24 +Applying split (1 -> 1 leaves, 24 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00000x): 5.08e-08s -> 5.08e-08s, so IGNORING this instance. +A total of 6 split steps (of max 6) (1 -> 1 subms) took 0.0001631s (of which 7.272e-05s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 9.537e-07s analyzing) +Total split + benchmarking process took 0.0001631s, equivalent to 3210.5/3210.5 new/old ops (1.788e-05s for 1 clones -- as 352.0/352.0 ops, or 352.0/352.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.0003s, 4.3e-05s for constructor, 2 clones) obtained NO speedup (best stays 39.37 Mflops). Backsolving we should get a unitary vector: %%MatrixMarket matrix array real general @@ -8671,162 +8754,179 @@ /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran Building a matrix with 210 nnz, 20 x 20 Duplicates check: 210 - 0 = 210 - converted COO to RSB in 7.242e-02 s (100.00 %) - analyzed arrays in 2.391e-02 s (33.02 %) - cleaned-up arrays in 5.007e-06 s (0.01 %) - deduplicated arrays in 3.815e-06 s (0.01 %) - sorted arrays in 1.646e-02 s (22.73 %) - shuffled partitions in 1.600e-02 s (22.09 %) - memory allocations took 3.314e-05 s (0.05 %) - leafs setup took 6.199e-06 s (0.01 %) - halfword conversion took 1.599e-02 s (22.08 %) -Built (20 x 20)[0x574ac440]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' + converted COO to RSB in 9.830e-04 s (100.00 %) + analyzed arrays in 6.700e-05 s (6.82 %) + cleaned-up arrays in 4.053e-06 s (0.41 %) + deduplicated arrays in 9.537e-07 s (0.10 %) + sorted arrays in 8.240e-04 s (83.82 %) + shuffled partitions in 3.719e-05 s (3.78 %) + memory allocations took 2.289e-05 s (2.33 %) + leafs setup took 2.861e-06 s (0.29 %) + halfword conversion took 1.717e-05 s (1.75 %) +Built (20 x 20)[0x57f8a450]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.228e-07 -Starting autotuning (16 x 5.22804e-07 s stages, transA=N, nrhs=1, timer gran.=5.22804e-07), 13 suggested as starting thread count(default). -3 iterations (13 th.) took 0.024s; avg 0.008s ( +/- 0.03/ 0.03 %); best 0.007997s; worst 0.008002s; std dev. 2.051e-06 (taking best). -Reference operation time is 0.00799704 s (0.105 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.007997 Mflops: 0.105) -Merge (22 -> 16 leaves) took w.c.t. of 0.004382s, ~0.0007312s of computing time (of which 3.29e-05s sorting, 5.96e-06s analysis) -3 iterations (13 th.) took 0.03158s; avg 0.01053s ( +/- 24.05/ 47.77 %); best 0.007995s; worst 0.01555s; std dev. 0.003556 (taking best). -Reference operation time is 0.00799489 s (0.1051 Mflops) with 13 threads. -After merge step 1: tpop: 0.007995 s ~Mflops: 0.105 nsubm:16 otn:13 -Applying merge (22 -> 16 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00027x): 0.007997s -> 0.007995s, so IGNORING this instance. -Merge (16 -> 13 leaves) took w.c.t. of 2.408e-05s, ~7.868e-06s of computing time (of which 2.146e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.02392s; avg 0.007975s ( +/- 0.37/ 0.42 %); best 0.007945s; worst 0.008008s; std dev. 2.584e-05 (taking best). -Reference operation time is 0.00794506 s (0.1057 Mflops) with 13 threads. -After merge step 2: tpop: 0.007945 s ~Mflops: 0.106 nsubm:13 otn:13 -Applying merge (16 -> 13 leaves, 13 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00654x): 0.007997s -> 0.007945s, so IGNORING this instance. -Merge (13 -> 10 leaves) took w.c.t. of 2.384e-05s, ~7.153e-06s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.02396s; avg 0.007988s ( +/- 0.47/ 0.28 %); best 0.00795s; worst 0.00801s; std dev. 2.68e-05 (taking best). -Reference operation time is 0.00795007 s (0.1057 Mflops) with 13 threads. -After merge step 3: tpop: 0.00795 s ~Mflops: 0.106 nsubm:10 otn:13 -Applying merge (13 -> 10 leaves, 13 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00591x): 0.007997s -> 0.00795s, so IGNORING this instance. -Merge (10 -> 8 leaves) took w.c.t. of 2.694e-05s, ~9.775e-06s of computing time (of which 2.861e-06s sorting, 2.861e-06s analysis) -3 iterations (13 th.) took 0.02392s; avg 0.007973s ( +/- 0.57/ 0.34 %); best 0.007927s; worst 0.008s; std dev. 3.251e-05 (taking best). -Reference operation time is 0.00792694 s (0.106 Mflops) with 13 threads. -After merge step 4: tpop: 0.007927 s ~Mflops: 0.106 nsubm:8 otn:13 -Applying merge (10 -> 8 leaves, 13 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00884x): 0.007997s -> 0.007927s, so IGNORING this instance. -Merge (8 -> 6 leaves) took w.c.t. of 1.788e-05s, ~5.96e-06s of computing time (of which 2.146e-06s sorting, 2.861e-06s analysis) -3 iterations (13 th.) took 0.03196s; avg 0.01065s ( +/- 25.08/ 50.13 %); best 0.007981s; worst 0.01599s; std dev. 0.003776 (taking best). -Reference operation time is 0.00798106 s (0.1052 Mflops) with 13 threads. -After merge step 5: tpop: 0.007981 s ~Mflops: 0.105 nsubm:6 otn:13 -Applying merge (8 -> 6 leaves, 13 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00200x): 0.007997s -> 0.007981s, so IGNORING this instance. -Merge (6 -> 3 leaves) took w.c.t. of 2.599e-05s, ~1.121e-05s of computing time (of which 4.053e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.02765s; avg 0.009218s ( +/- 13.40/ 26.39 %); best 0.007983s; worst 0.01165s; std dev. 0.00172 (taking best). -Reference operation time is 0.00798297 s (0.1052 Mflops) with 13 threads. -After merge step 6: tpop: 0.007983 s ~Mflops: 0.105 nsubm:3 otn:13 -Applying merge (6 -> 3 leaves, 13 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00176x): 0.007997s -> 0.007983s, so IGNORING this instance. -A total of 6 merge steps (of max 6) (22 -> 3 subms) took 0.168s (of which 0.004538s partitioning, 0s I/O); computing times: 0.0007732s in par. loops, 4.601e-05s sorting, 2.384e-05s analyzing) -Total merge + benchmarking process took 0.168s, equivalent to 21.0/21.0 new/old ops (0.02391s for 1 clones -- as 3.0/3.0 ops, or 3.0/3.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 0.028s; avg 0.009332s ( +/- 14.24/ 28.45 %); best 0.008003s; worst 0.01199s; std dev. 0.001878 (taking best). -Reference operation time is 0.008003 s (0.105 Mflops) with 13 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.008003 Mflops: 0.105) -Split (22 -> 51 leaves, 30 -> 70 subms) took 0.007982s (of which: 5.007e-06s analysis, -1.915e+10s mem.mgmt); compute time: 0.005842s overall, 3.958e-05s searches, 0.005802s shuffle, 0.003363s switch, 0.0001564s quadrants. -3 iterations (13 th.) took 0.004863s; avg 0.001621s ( +/- 7.59/ 10.97 %); best 0.001498s; worst 0.001799s; std dev. 0.0001288 (taking best). -Reference operation time is 0.00149798 s (0.5608 Mflops) with 13 threads. -After split step 1: tpop: 0.001498 s ~Mflops: 0.561 nsubm:51 otn:13 -Applying split (22 -> 51 leaves, 13 th.) yielded SPEEDUP of 5.343x: 0.008003s -> 0.001498s, so taking this instance. -Split (51 -> 122 leaves, 70 -> 166 subms) took 0.0004921s (of which: 1.001e-05s analysis, -4.351e+10s mem.mgmt); compute time: 0.004382s overall, 7.2e-05s searches, 0.00431s shuffle, 0.001388s switch, 0.0005753s quadrants. -3 iterations (13 th.) took 0.0224s; avg 0.007465s ( +/- 23.86/ 39.28 %); best 0.005684s; worst 0.0104s; std dev. 0.002089 (taking best). -Reference operation time is 0.0056839 s (0.1478 Mflops) with 13 threads. -After split step 2: tpop: 0.005684 s ~Mflops: 0.148 nsubm:122 otn:13 -Applying split (51 -> 122 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 3.794x: 0.001498s -> 0.005684s. -Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 split steps (of max 6) (22 -> 122 subms) took 0.036s (of which 0.008643s partitioning, 0s I/O); computing times: 0.01022s in par. loops, 0.0001116s sorting, 1.502e-05s analyzing) -Total split + benchmarking process took 0.036s, equivalent to 24.0/4.5 new/old ops (0.02802s for 2 clones -- as 18.7/3.5 ops, or 9.4/1.8 ops per clone), SPEEDUP of 5.343x -Applying multi-split (22 -> 51 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 5.343x (0.008003s -> 0.001498s), will amortize in 5.5 ops by saving 0.006505s per op. -In 1 tuning rounds (tot. 0.31s, 0.052s for constructor, 3 clones) obtained a SPEEDUP of 434.3% (5.343x) (from 0.105 to 0.5608 Mflops). - autotuner chose 13 threads -Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 70 subms, 51 lsubms, 4.4571 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.228e-07 -Starting autotuning (16 x 5.22804e-07 s stages, transA=N, nrhs=1, timer gran.=5.22804e-07), 13 suggested as starting thread count(default). -3 iterations (13 th.) took 0.01199s; avg 0.003995s ( +/- 61.42/122.20 %); best 0.001541s; worst 0.008877s; std dev. 0.003452 (taking best). -~ 13 threads: 0.001541s (0.55 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.01197s; avg 0.003989s ( +/- 68.71/ 37.97 %); best 0.001248s; worst 0.005503s; std dev. 0.001942 (taking best). - 12 threads: 0.001248s (0.67 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.00356s; avg 0.001187s ( +/- 7.82/ 9.22 %); best 0.001094s; worst 0.001296s; std dev. 8.336e-05 (taking best). - 11 threads: 0.001094s (0.77 Mflops) (0/2 degradations so far) - -3 iterations (10 th.) took 0.04042s; avg 0.01347s ( +/- 94.07/113.17 %); best 0.0007989s; worst 0.02872s; std dev. 0.01154 (taking best). - 10 threads: 0.0007989s (1.1 Mflops) (0/2 degradations so far) - -3 iterations (9 th.) took 0.04396s; avg 0.01465s ( +/- 18.20/ 23.40 %); best 0.01199s; worst 0.01808s; std dev. 0.002547 (taking best). - 9 threads: 0.01199s (0.07 Mflops) (1/2 degradations so far) - -3 iterations (8 th.) took 0.02403s; avg 0.008009s ( +/- 0.13/ 0.14 %); best 0.007999s; worst 0.008021s; std dev. 9e-06 (taking best). - 8 threads: 0.007999s (0.11 Mflops) (2/2 degradations so far) - -Best threads choice is 10; starting threads were 13; max speed gap is 15x; search took 0.14s. -Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 70 subms, 51 lsubms, 4.4571 bpnz (tpop: 0.0007989 Mflops: 1.051) -Merge (51 -> 36 leaves) took w.c.t. of 0.00795s, ~5.889e-05s of computing time (of which 1.216e-05s sorting, 1.001e-05s analysis) -3 iterations (13 th.) took 0.03599s; avg 0.012s ( +/- 33.38/ 33.07 %); best 0.007991s; worst 0.01596s; std dev. 0.003254 (taking best). -~ 13 threads: 0.007991s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.02396s; avg 0.007988s ( +/- 0.22/ 0.18 %); best 0.007971s; worst 0.008003s; std dev. 1.317e-05 (taking best). - 12 threads: 0.007971s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.02399s; avg 0.007997s ( +/- 0.17/ 0.11 %); best 0.007983s; worst 0.008006s; std dev. 1.001e-05 (taking best). - 11 threads: 0.007983s (0.11 Mflops) (1/2 degradations so far) - -3 iterations (10 th.) took 0.02399s; avg 0.007995s ( +/- 0.20/ 0.11 %); best 0.007979s; worst 0.008004s; std dev. 1.147e-05 (taking best). - 10 threads: 0.007979s (0.11 Mflops) (2/2 degradations so far) - -Best threads choice is 12; starting threads were 13; max speed gap is 1x; search took 0.11s. -After merge step 1: tpop: 0.007971 s ~Mflops: 0.105 nsubm:36 otn:12 -Applying merge (51 -> 36 leaves, 12 th.) yielded SLOWDOWN (1th of 3 tolerable) of 9.977x: 0.0007989s -> 0.007971s. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.08e-08 +Starting autotuning (16 x 5.07951e-08 s stages, transA=N, nrhs=1, timer gran.=5.07951e-08), 24 suggested as starting thread count(default). +3 iterations (24 th.) took 0.004582s; avg 0.001527s ( +/- 66.55/ 38.15 %); best 0.0005109s; worst 0.00211s; std dev. 0.0007213 (taking best). +Reference operation time is 0.000510931 s (1.644 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.0005109 Mflops: 1.644) +Merge (22 -> 16 leaves) took w.c.t. of 0.000324s, ~0.0003181s of computing time (of which 2.146e-06s sorting, 4.053e-06s analysis) +3 iterations (24 th.) took 0.003022s; avg 0.001007s ( +/- 75.20/ 47.03 %); best 0.0002499s; worst 0.001481s; std dev. 0.0005412 (taking best). +Reference operation time is 0.000249863 s (3.362 Mflops) with 24 threads. +After merge step 1: tpop: 0.0002499 s ~Mflops: 3.362 nsubm:16 otn:24 +Applying merge (22 -> 16 leaves, 24 th.) yielded SPEEDUP of 2.045x: 0.0005109s -> 0.0002499s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 1.502e-05s, ~8.821e-06s of computing time (of which 3.099e-06s sorting, 3.099e-06s analysis) +3 iterations (24 th.) took 0.002044s; avg 0.0006813s ( +/- 68.02/134.98 %); best 0.0002179s; worst 0.001601s; std dev. 0.0006503 (taking best). +Reference operation time is 0.000217915 s (3.855 Mflops) with 24 threads. +After merge step 2: tpop: 0.0002179 s ~Mflops: 3.855 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded SPEEDUP of 1.147x: 0.0002499s -> 0.0002179s, so taking this instance. +Merge (10 -> 8 leaves) took w.c.t. of 1.192e-05s, ~3.815e-06s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 0.000803s; avg 0.0002677s ( +/- 57.78/ 86.07 %); best 0.000113s; worst 0.0004981s; std dev. 0.0001661 (taking best). +Reference operation time is 0.00011301 s (7.433 Mflops) with 24 threads. +After merge step 3: tpop: 0.000113 s ~Mflops: 7.433 nsubm:8 otn:24 +Applying merge (10 -> 8 leaves, 24 th.) yielded SPEEDUP of 1.928x: 0.0002179s -> 0.000113s, so taking this instance. +Merge (8 -> 6 leaves) took w.c.t. of 8.106e-06s, ~2.861e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 0.000936s; avg 0.000312s ( +/- 43.30/ 44.27 %); best 0.0001769s; worst 0.0004501s; std dev. 0.0001116 (taking best). +Reference operation time is 0.000176907 s (4.748 Mflops) with 24 threads. +After merge step 4: tpop: 0.0001769 s ~Mflops: 4.748 nsubm:6 otn:24 +Applying merge (8 -> 6 leaves, 24 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.565x: 0.000113s -> 0.0001769s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 merge steps (of max 6) (51 -> 36 subms) took 0.116s (of which 0.007962s partitioning, 0s I/O); computing times: 5.889e-05s in par. loops, 1.216e-05s sorting, 1.001e-05s analyzing) -Total merge + benchmarking process took 0.116s, equivalent to 145.1/145.1 new/old ops (0.003999s for 1 clones -- as 5.0/5.0 ops, or 5.0/5.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 0.02402s; avg 0.008007s ( +/- 0.08/ 0.15 %); best 0.008001s; worst 0.008019s; std dev. 8.266e-06 (taking best). -~ 13 threads: 0.008001s (0.1 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.02397s; avg 0.007989s ( +/- 0.10/ 0.13 %); best 0.007981s; worst 0.007999s; std dev. 7.613e-06 (taking best). - 12 threads: 0.007981s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.02374s; avg 0.007915s ( +/- 2.31/ 1.17 %); best 0.007732s; worst 0.008007s; std dev. 0.0001292 (taking best). - 11 threads: 0.007732s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (10 th.) took 0.02398s; avg 0.007995s ( +/- 0.19/ 0.12 %); best 0.00798s; worst 0.008005s; std dev. 1.071e-05 (taking best). - 10 threads: 0.00798s (0.11 Mflops) (1/2 degradations so far) - -3 iterations (9 th.) took 0.02397s; avg 0.007992s ( +/- 24.42/ 23.67 %); best 0.00604s; worst 0.009883s; std dev. 0.00157 (taking best). - 9 threads: 0.00604s (0.14 Mflops) (0/2 degradations so far) - -3 iterations (8 th.) took 0.02398s; avg 0.007995s ( +/- 0.13/ 0.27 %); best 0.007984s; worst 0.008016s; std dev. 1.517e-05 (taking best). - 8 threads: 0.007984s (0.11 Mflops) (1/2 degradations so far) - -3 iterations (7 th.) took 0.02398s; avg 0.007994s ( +/- 0.12/ 0.11 %); best 0.007985s; worst 0.008003s; std dev. 7.419e-06 (taking best). - 7 threads: 0.007985s (0.11 Mflops) (2/2 degradations so far) - -Best threads choice is 9; starting threads were 13; max speed gap is 1.3x; search took 0.17s. -Starting split (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 70 subms, 51 lsubms, 4.4571 bpnz (tpop: 0.00604 Mflops: 0.139) -Split (51 -> 122 leaves, 70 -> 166 subms) took 0.01198s (of which: 1.287e-05s analysis, -4.351e+10s mem.mgmt); compute time: 0.003376s overall, 6.27e-05s searches, 0.003313s shuffle, 0.001089s switch, 0.0006022s quadrants. -3 iterations (13 th.) took 0.02795s; avg 0.009318s ( +/- 14.45/ 28.84 %); best 0.007971s; worst 0.012s; std dev. 0.0019 (taking best). -~ 13 threads: 0.007971s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.03198s; avg 0.01066s ( +/- 25.27/ 12.68 %); best 0.007966s; worst 0.01201s; std dev. 0.001905 (taking best). - 12 threads: 0.007966s (0.11 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.028s; avg 0.009334s ( +/- 14.22/ 28.14 %); best 0.008007s; worst 0.01196s; std dev. 0.001857 (taking best). - 11 threads: 0.008007s (0.1 Mflops) (1/2 degradations so far) - -3 iterations (10 th.) took 0.04396s; avg 0.01465s ( +/- 18.40/ 9.20 %); best 0.01196s; worst 0.016s; std dev. 0.001907 (taking best). - 10 threads: 0.01196s (0.07 Mflops) (2/2 degradations so far) - -Best threads choice is 12; starting threads were 13; max speed gap is 1.5x; search took 0.13s. -After split step 1: tpop: 0.007966 s ~Mflops: 0.105 nsubm:122 otn:12 -Applying split (51 -> 122 leaves, 12 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.319x: 0.00604s -> 0.007966s. -Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 split steps (of max 6) (51 -> 122 subms) took 0.144s (of which 0.01203s partitioning, 0s I/O); computing times: 0.003376s in par. loops, 6.27e-05s sorting, 1.287e-05s analyzing) -Total split + benchmarking process took 0.144s, equivalent to 23.8/23.8 new/old ops (0.02395s for 1 clones -- as 4.0/4.0 ops, or 4.0/4.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -In 1 tuning rounds (tot. 0.59s, 0.028s for constructor, 2 clones) obtained NO speedup (best stays 0.1391 Mflops). +A total of 4 merge steps (of max 6) (22 -> 6 subms) took 0.007314s (of which 0.0003729s partitioning, 0s I/O); computing times: 0.0003335s in par. loops, 8.106e-06s sorting, 1.001e-05s analyzing) +Total merge + benchmarking process took 0.007314s, equivalent to 64.7/14.3 new/old ops (0.000104s for 4 clones -- as 0.9/0.2 ops, or 0.2/0.1 ops per clone), SPEEDUP of 4.521x +Applying multi-merge (22 -> 8 leaves, 3 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 4.521x (0.0005109s -> 0.000113s), will amortize in 18.4 ops by saving 0.0003979s per op. +In 1 tuning rounds (tot. 0.012s, 0.0001s for constructor, 4 clones) obtained a SPEEDUP of 352.1% (4.521x) (from 1.644 to 7.433 Mflops). + autotuner chose 24 threads +Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 11 subms, 8 lsubms, 3.0095 bpnz. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.08e-08 +Starting autotuning (16 x 5.07951e-08 s stages, transA=N, nrhs=1, timer gran.=5.07951e-08), 24 suggested as starting thread count(default). +3 iterations (24 th.) took 0.0004952s; avg 0.0001651s ( +/- 66.63/ 90.80 %); best 5.507e-05s; worst 0.000315s; std dev. 0.0001098 (taking best). +~ 24 threads: 5.507e-05s (15 Mflops) (0/2 degradations so far) - +3 iterations (23 th.) took 0.001003s; avg 0.0003343s ( +/- 36.25/ 56.74 %); best 0.0002131s; worst 0.000524s; std dev. 0.0001359 (taking best). + 23 threads: 0.0002131s (3.9 Mflops) (1/2 degradations so far) - +3 iterations (22 th.) took 0.000248s; avg 8.265e-05s ( +/- 43.17/ 51.44 %); best 4.697e-05s; worst 0.0001252s; std dev. 3.229e-05 (taking best). + 22 threads: 4.697e-05s (18 Mflops) (0/2 degradations so far) - +3 iterations (21 th.) took 0.0001731s; avg 5.77e-05s ( +/- 27.27/ 30.17 %); best 4.196e-05s; worst 7.51e-05s; std dev. 1.358e-05 (taking best). + 21 threads: 4.196e-05s (20 Mflops) (0/2 degradations so far) - +3 iterations (20 th.) took 0.0007119s; avg 0.0002373s ( +/- 87.34/170.56 %); best 3.004e-05s; worst 0.0006421s; std dev. 0.0002862 (taking best). + 20 threads: 3.004e-05s (28 Mflops) (0/2 degradations so far) - +3 iterations (19 th.) took 0.0006869s; avg 0.000229s ( +/- 81.15/112.63 %); best 4.315e-05s; worst 0.0004869s; std dev. 0.0001882 (taking best). + 19 threads: 4.315e-05s (19 Mflops) (1/2 degradations so far) - +3 iterations (18 th.) took 0.000443s; avg 0.0001477s ( +/- 36.87/ 73.25 %); best 9.322e-05s; worst 0.0002558s; std dev. 7.648e-05 (taking best). + 18 threads: 9.322e-05s (9 Mflops) (2/2 degradations so far) - +Best threads choice is 20; starting threads were 24; max speed gap is 7.1x; search took 0.0038s. +Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 11 subms, 8 lsubms, 3.0095 bpnz (tpop: 3.004e-05 Mflops: 27.962) +Merge (8 -> 6 leaves) took w.c.t. of 1.097e-05s, ~3.815e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 0.0003669s; avg 0.0001223s ( +/- 73.10/ 58.48 %); best 3.29e-05s; worst 0.0001938s; std dev. 6.691e-05 (taking best). +~ 24 threads: 3.29e-05s (26 Mflops) (0/2 degradations so far) - +3 iterations (23 th.) took 0.000149s; avg 4.967e-05s ( +/- 59.68/ 80.96 %); best 2.003e-05s; worst 8.988e-05s; std dev. 2.948e-05 (taking best). + 23 threads: 2.003e-05s (42 Mflops) (0/2 degradations so far) - +3 iterations (22 th.) took 0.0004621s; avg 0.000154s ( +/- 54.49/ 86.38 %); best 7.01e-05s; worst 0.0002871s; std dev. 9.514e-05 (taking best). + 22 threads: 7.01e-05s (12 Mflops) (1/2 degradations so far) - +3 iterations (21 th.) took 0.0002291s; avg 7.637e-05s ( +/- 73.78/143.50 %); best 2.003e-05s; worst 0.000186s; std dev. 7.75e-05 (taking best). + 21 threads: 2.003e-05s (42 Mflops) (0/2 degradations so far) - +3 iterations (20 th.) took 0.000176s; avg 5.865e-05s ( +/- 36.99/ 67.07 %); best 3.695e-05s; worst 9.799e-05s; std dev. 2.787e-05 (taking best). + 20 threads: 3.695e-05s (23 Mflops) (1/2 degradations so far) - +3 iterations (19 th.) took 0.0008819s; avg 0.000294s ( +/- 79.32/101.78 %); best 6.08e-05s; worst 0.0005932s; std dev. 0.0002223 (taking best). + 19 threads: 6.08e-05s (14 Mflops) (2/2 degradations so far) - +Best threads choice is 23; starting threads were 24; max speed gap is 3.5x; search took 0.0023s. +After merge step 1: tpop: 2.003e-05 s ~Mflops: 41.943 nsubm:6 otn:23 +Applying merge (8 -> 6 leaves, 23 th.) yielded SPEEDUP of 1.500x: 3.004e-05s -> 2.003e-05s, so taking this instance. +Merge (6 -> 3 leaves) took w.c.t. of 9.06e-06s, ~4.053e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 9.799e-05s; avg 3.266e-05s ( +/- 38.69/ 46.72 %); best 2.003e-05s; worst 4.792e-05s; std dev. 1.154e-05 (taking best). +~ 24 threads: 2.003e-05s (42 Mflops) (0/2 degradations so far) - +3 iterations (23 th.) took 5.698e-05s; avg 1.899e-05s ( +/- 30.96/ 36.82 %); best 1.311e-05s; worst 2.599e-05s; std dev. 5.315e-06 (taking best). + 23 threads: 1.311e-05s (64 Mflops) (0/2 degradations so far) - +3 iterations (22 th.) took 6.58e-05s; avg 2.193e-05s ( +/- 50.00/ 27.17 %); best 1.097e-05s; worst 2.789e-05s; std dev. 7.765e-06 (taking best). + 22 threads: 1.097e-05s (77 Mflops) (0/2 degradations so far) - +3 iterations (21 th.) took 4.888e-05s; avg 1.629e-05s ( +/- 44.39/ 28.78 %); best 9.06e-06s; worst 2.098e-05s; std dev. 5.188e-06 (taking best). + 21 threads: 9.06e-06s (93 Mflops) (0/2 degradations so far) - +3 iterations (20 th.) took 4.601e-05s; avg 1.534e-05s ( +/- 8.29/ 16.58 %); best 1.407e-05s; worst 1.788e-05s; std dev. 1.798e-06 (taking best). + 20 threads: 1.407e-05s (60 Mflops) (1/2 degradations so far) - +3 iterations (19 th.) took 4.506e-05s; avg 1.502e-05s ( +/- 33.33/ 26.98 %); best 1.001e-05s; worst 1.907e-05s; std dev. 3.76e-06 (taking best). + 19 threads: 1.001e-05s (84 Mflops) (2/2 degradations so far) - +Best threads choice is 21; starting threads were 24; max speed gap is 2.2x; search took 0.00041s. +After merge step 2: tpop: 9.06e-06 s ~Mflops: 92.716 nsubm:3 otn:21 +Applying merge (6 -> 3 leaves, 21 th.) yielded SPEEDUP of 2.211x: 2.003e-05s -> 9.06e-06s, so taking this instance. +Merge (3 -> 1 leaves) took w.c.t. of 1.001e-05s, ~5.96e-06s of computing time (of which 2.146e-06s sorting, 1.192e-06s analysis) +3 iterations (24 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). +~ 24 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (23 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 23 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (22 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 22 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (21 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 21 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (20 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 20 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (19 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 19 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (18 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 18 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (17 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 17 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (16 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 16 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (15 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 15 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (14 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 14 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (13 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 13 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (12 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 12 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (11 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 11 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (10 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 10 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 9 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 8 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 7 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 6 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 5 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (4 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 4 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (3 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 3 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (2 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 2 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (1 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 1 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +Best threads choice is 24; starting threads were 24; max speed gap is 1x; search took 0.00023s. +After merge step 3: tpop: 9.537e-07 s ~Mflops: 880.804 nsubm:1 otn:24 +Applying merge (3 -> 1 leaves, 24 th.) yielded SPEEDUP of 9.500x: 9.06e-06s -> 9.537e-07s, so taking this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 3 merge steps (of max 6) (8 -> 1 subms) took 0.003072s (of which 3.839e-05s partitioning, 0s I/O); computing times: 1.383e-05s in par. loops, 4.292e-06s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.003072s, equivalent to 3221.2/102.3 new/old ops (7.606e-05s for 4 clones -- as 79.8/2.5 ops, or 19.9/0.6 ops per clone), SPEEDUP of 31.500x +Applying multi-merge (8 -> 1 leaves, 3 steps, 20 -> 24 th.sp.) yielded SPEEDUP of 31.500x (3.004e-05s -> 9.537e-07s), will amortize in 105.6 ops by saving 2.909e-05s per op. +In 1 tuning rounds (tot. 0.0069s, 7.6e-05s for constructor, 4 clones) obtained a SPEEDUP of 3050.0% (31.5x) (from 27.96 to 880.8 Mflops). check results are ok Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 5.539e-02 s (100.00 %) - analyzed arrays in 2.400e-02 s (43.33 %) - cleaned-up arrays in 1.907e-06 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 1.538e-02 s (27.76 %) - shuffled partitions in 1.599e-02 s (28.88 %) - memory allocations took 8.106e-06 s (0.01 %) - leafs setup took 9.537e-07 s (0.00 %) - halfword conversion took 1.907e-06 s (0.00 %) -Built (6 x 6)[0x574af7a0]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' + converted COO to RSB in 3.314e-05 s (100.00 %) + analyzed arrays in 1.001e-05 s (30.22 %) + cleaned-up arrays in 0.000e+00 s (0.00 %) + deduplicated arrays in 9.537e-07 s (2.88 %) + sorted arrays in 1.001e-05 s (30.22 %) + shuffled partitions in 6.914e-06 s (20.86 %) + memory allocations took 3.099e-06 s (9.35 %) + leafs setup took 9.537e-07 s (2.88 %) + halfword conversion took 1.192e-06 s (3.60 %) +Built (6 x 6)[0x57f913a0]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' Read matrix pd.mtx 6 x 6 : 36 Matrix has no symmetry Using NRHS=4 -Repeated USMV took 0.7861E-03 s -A single USMM took 0.1597E-04 s -USMM-to-USMV speed ratio is is 49.21 x +Repeated USMV took 0.3290E-04 s +A single USMM took 0.2599E-04 s +USMM-to-USMV speed ratio is is 1.266 x Call auto-tuning routine.. Repeat measurement. -Tuned USMM took 0.5960E-05 s -Tuned-to-untuned speed ratio is is 2.680 x +Tuned USMM took 0.1907E-05 s +Tuned-to-untuned speed ratio is is 13.62 x FAILED: 0 PASSED: 2 /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran_rsb_fi @@ -8875,275 +8975,243 @@ Loading matrix from file "/build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx". Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.186e-01 s (100.00 %) - analyzed arrays in 3.984e-02 s (33.59 %) - cleaned-up arrays in 5.960e-06 s (0.01 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 1.474e-02 s (12.42 %) - shuffled partitions in 3.201e-02 s (26.99 %) - memory allocations took 2.646e-05 s (0.02 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (26.96 %) -Built (6 x 6)[0x57bc79b0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + converted COO to RSB in 8.750e-04 s (100.00 %) + analyzed arrays in 5.293e-05 s (6.05 %) + cleaned-up arrays in 4.053e-06 s (0.46 %) + deduplicated arrays in 0.000e+00 s (0.00 %) + sorted arrays in 7.551e-04 s (86.29 %) + shuffled partitions in 3.099e-05 s (3.54 %) + memory allocations took 6.914e-06 s (0.79 %) + leafs setup took 2.861e-06 s (0.33 %) + halfword conversion took 1.502e-05 s (1.72 %) +Built (6 x 6)[0x57ff29c0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Considering D clone. Base matrix: -(6 x 6)[0x57bca520]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x57ff6320]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.0479s; avg 0.01597s ( +/- 0.39/ 0.24 %); best 0.0159s; worst 0.01601s; std dev. 4.437e-05 (taking best). -Reference operation time is 0.0159039 s (0.009054 Mflops) with 13 threads. -After 0.047979s, autotuning routine did not find a better threads count configuration. +3 iterations (24 th.) took 0.000483s; avg 0.000161s ( +/- 5.68/ 10.61 %); best 0.0001519s; worst 0.0001781s; std dev. 1.209e-05 (taking best). +Reference operation time is 0.000151873 s (0.9482 Mflops) with 24 threads. +After 0.000516s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.048s; avg 0.016s ( +/- 0.01/ 0.02 %); best 0.016s; worst 0.016s; std dev. 2.135e-06 (taking best). -Reference operation time is 0.0159979 s (0.009001 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.016 Mflops: 0.009) -Merge (22 -> 16 leaves) took w.c.t. of 0.008519s, ~0.001041s of computing time (of which 5.245e-06s sorting, 5.96e-06s analysis) -3 iterations (13 th.) took 0.04745s; avg 0.01582s ( +/- 2.22/ 1.19 %); best 0.01547s; worst 0.01601s; std dev. 0.0002485 (taking best). -Reference operation time is 0.015466 s (0.009311 Mflops) with 13 threads. -After merge step 1: tpop: 0.01547 s ~Mflops: 0.009 nsubm:16 otn:13 -Applying merge (22 -> 16 leaves, 13 th.) yielded SPEEDUP of 1.034x: 0.016s -> 0.01547s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.008035s, ~3.099e-05s of computing time (of which 5.007e-06s sorting, 5.96e-06s analysis) -3 iterations (13 th.) took 0.03601s; avg 0.012s ( +/- 32.90/ 32.86 %); best 0.008055s; worst 0.01595s; std dev. 0.003222 (taking best). -Reference operation time is 0.00805497 s (0.01788 Mflops) with 13 threads. -After merge step 2: tpop: 0.008055 s ~Mflops: 0.018 nsubm:10 otn:13 -Applying merge (16 -> 10 leaves, 13 th.) yielded SPEEDUP of 1.920x: 0.01547s -> 0.008055s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 2.789e-05s, ~7.153e-06s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.02395s; avg 0.007983s ( +/- 0.41/ 0.35 %); best 0.007951s; worst 0.008011s; std dev. 2.474e-05 (taking best). -Reference operation time is 0.00795102 s (0.01811 Mflops) with 13 threads. -After merge step 3: tpop: 0.007951 s ~Mflops: 0.018 nsubm:7 otn:13 -Applying merge (10 -> 7 leaves, 13 th.) yielded SPEEDUP of 1.013x: 0.008055s -> 0.007951s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 1.812e-05s, ~5.96e-06s of computing time (of which 1.907e-06s sorting, 3.099e-06s analysis) -3 iterations (13 th.) took 0.02796s; avg 0.009321s ( +/- 14.23/ 28.46 %); best 0.007994s; worst 0.01197s; std dev. 0.001875 (taking best). -Reference operation time is 0.00799394 s (0.01801 Mflops) with 13 threads. -After merge step 4: tpop: 0.007994 s ~Mflops: 0.018 nsubm:4 otn:13 -Applying merge (7 -> 4 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99463x): 0.007951s -> 0.007994s, so IGNORING this instance. -Merge (4 -> 1 leaves) took w.c.t. of 2.003e-05s, ~7.153e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (13 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 52.00/ 56.00 %); best 9.537e-07s; worst 3.099e-06s; std dev. 8.778e-07 (taking best). -Reference operation time is 9.53674e-07 s (151 Mflops) with 13 threads. -After merge step 5: tpop: 9.537e-07 s ~Mflops: 150.995 nsubm:1 otn:13 -Applying merge (4 -> 1 leaves, 13 th.) yielded SPEEDUP of 8337.250x: 0.007951s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 0.0004361s; avg 0.0001454s ( +/- 3.55/ 3.17 %); best 0.0001402s; worst 0.00015s; std dev. 4.01e-06 (taking best). +Reference operation time is 0.00014019 s (1.027 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.0001402 Mflops: 1.027) +Merge (22 -> 16 leaves) took w.c.t. of 0.0002809s, ~0.0002699s of computing time (of which 3.099e-06s sorting, 2.861e-06s analysis) +3 iterations (24 th.) took 0.001728s; avg 0.000576s ( +/- 84.89/169.45 %); best 8.702e-05s; worst 0.001552s; std dev. 0.0006902 (taking best). +Reference operation time is 8.70228e-05 s (1.655 Mflops) with 24 threads. +After merge step 1: tpop: 8.702e-05 s ~Mflops: 1.655 nsubm:16 otn:24 +Applying merge (22 -> 16 leaves, 24 th.) yielded SPEEDUP of 1.611x: 0.0001402s -> 8.702e-05s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 1.502e-05s, ~1.001e-05s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 0.001638s; avg 0.000546s ( +/- 92.14/182.79 %); best 4.292e-05s; worst 0.001544s; std dev. 0.0007057 (taking best). +Reference operation time is 4.29153e-05 s (3.355 Mflops) with 24 threads. +After merge step 2: tpop: 4.292e-05 s ~Mflops: 3.355 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded SPEEDUP of 2.028x: 8.702e-05s -> 4.292e-05s, so taking this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.097e-05s, ~2.861e-06s of computing time (of which 0s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 6.89e-05s; avg 2.297e-05s ( +/- 26.30/ 29.76 %); best 1.693e-05s; worst 2.98e-05s; std dev. 5.286e-06 (taking best). +Reference operation time is 1.69277e-05 s (8.507 Mflops) with 24 threads. +After merge step 3: tpop: 1.693e-05 s ~Mflops: 8.507 nsubm:7 otn:24 +Applying merge (10 -> 7 leaves, 24 th.) yielded SPEEDUP of 2.535x: 4.292e-05s -> 1.693e-05s, so taking this instance. +Merge (7 -> 4 leaves) took w.c.t. of 5.96e-06s, ~2.146e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 16.56/ 17.22 %); best 1.001e-05s; worst 1.407e-05s; std dev. 1.656e-06 (taking best). +Reference operation time is 1.00136e-05 s (14.38 Mflops) with 24 threads. +After merge step 4: tpop: 1.001e-05 s ~Mflops: 14.380 nsubm:4 otn:24 +Applying merge (7 -> 4 leaves, 24 th.) yielded SPEEDUP of 1.690x: 1.693e-05s -> 1.001e-05s, so taking this instance. +Merge (4 -> 1 leaves) took w.c.t. of 8.106e-06s, ~2.861e-06s of computing time (of which 9.537e-07s sorting, 0s analysis) +3 iterations (24 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 92.75/ 66.67 %); best 5.186e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.1856e-08 s (2777 Mflops) with 24 threads. +After merge step 5: tpop: 5.186e-08 s ~Mflops: 2776.919 nsubm:1 otn:24 +Applying merge (4 -> 1 leaves, 24 th.) yielded SPEEDUP of 193.103x: 1.001e-05s -> 5.186e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.276s (of which 0.01665s partitioning, 0s I/O); computing times: 0.001092s in par. loops, 1.597e-05s sorting, 2.122e-05s analyzing) -Total merge + benchmarking process took 0.276s, equivalent to 289396.5/17.3 new/old ops (0.1715s for 5 clones -- as 179850.5/10.7 ops, or 35970.1/2.1 ops per clone), SPEEDUP of 16775.000x -Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 16775.000x (0.016s -> 9.537e-07s), will amortize in 17.3 ops by saving 0.016s per op. -In 1 tuning rounds (tot. 0.37s, 0.17s for constructor, 5 clones) obtained a SPEEDUP of 1677400.0% (1.678e+04x) (from 0.009001 to 151 Mflops). -After 0.372008s, global autotuning declared speedup of 16775 x, when using threads count of 13 and a new matrix: -(6 x 6)[0x57bcee70]{D} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.004011s (of which 0.0003479s partitioning, 0s I/O); computing times: 0.0002878s in par. loops, 6.914e-06s sorting, 7.868e-06s analyzing) +Total merge + benchmarking process took 0.004011s, equivalent to 77347.1/28.6 new/old ops (0.0001245s for 6 clones -- as 2400.0/0.9 ops, or 400.0/0.1 ops per clone), SPEEDUP of 2703.448x +Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 2703.448x (0.0001402s -> 5.186e-08s), will amortize in 28.6 ops by saving 0.0001401s per op. +In 1 tuning rounds (tot. 0.0045s, 0.00012s for constructor, 6 clones) obtained a SPEEDUP of 270244.8% (2703x) (from 1.027 to 2777 Mflops). +After 0.004514s, global autotuning declared speedup of 2703.45 x, when using threads count of 24 and a new matrix: +(6 x 6)[0x57ff5190]{D} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' Considering S clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 5.997e-02 s (100.00 %) - analyzed arrays in 2.394e-02 s (39.92 %) - cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 1.192e-06 s (0.00 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 1.599e-02 s (26.67 %) - memory allocations took 7.153e-06 s (0.01 %) - leafs setup took 5.007e-06 s (0.01 %) - halfword conversion took 2.002e-02 s (33.38 %) -Built (6 x 6)[0x57bcbf50]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' + converted COO to RSB in 7.296e-05 s (100.00 %) + analyzed arrays in 3.195e-05 s (43.79 %) + cleaned-up arrays in 0.000e+00 s (0.00 %) + deduplicated arrays in 9.537e-07 s (1.31 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 2.313e-05 s (31.70 %) + memory allocations took 4.053e-06 s (5.56 %) + leafs setup took 9.537e-07 s (1.31 %) + halfword conversion took 1.192e-05 s (16.34 %) +Built (6 x 6)[0x57ff6320]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Base matrix: -(6 x 6)[0x57bcbf50]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +(6 x 6)[0x57ff6320]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.03193s; avg 0.01064s ( +/- 24.83/ 12.81 %); best 0.008s; worst 0.01201s; std dev. 0.001869 (taking best). -Reference operation time is 0.0079999 s (0.018 Mflops) with 13 threads. -After 0.031962s, autotuning routine did not find a better threads count configuration. +3 iterations (24 th.) took 0.0002711s; avg 9.036e-05s ( +/- 9.23/ 12.93 %); best 8.202e-05s; worst 0.000102s; std dev. 8.51e-06 (taking best). +Reference operation time is 8.2016e-05 s (1.756 Mflops) with 24 threads. +After 0.000287s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.024s; avg 0.007998s ( +/- 0.19/ 0.32 %); best 0.007983s; worst 0.008024s; std dev. 1.82e-05 (taking best). -Reference operation time is 0.00798321 s (0.01804 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.007983 Mflops: 0.018) -Merge (16 -> 10 leaves) took w.c.t. of 0.003961s, ~1.478e-05s of computing time (of which 3.099e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.03997s; avg 0.01332s ( +/- 39.91/ 49.90 %); best 0.008006s; worst 0.01997s; std dev. 0.004975 (taking best). -Reference operation time is 0.00800586 s (0.01799 Mflops) with 13 threads. -After merge step 1: tpop: 0.008006 s ~Mflops: 0.018 nsubm:10 otn:13 -Applying merge (16 -> 10 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99717x): 0.007983s -> 0.008006s, so IGNORING this instance. -Merge (10 -> 7 leaves) took w.c.t. of 2.193e-05s, ~7.153e-06s of computing time (of which 2.146e-06s sorting, 5.007e-06s analysis) -3 iterations (13 th.) took 0.02371s; avg 0.007903s ( +/- 2.27/ 1.35 %); best 0.007724s; worst 0.00801s; std dev. 0.0001276 (taking best). -Reference operation time is 0.00772405 s (0.01864 Mflops) with 13 threads. -After merge step 2: tpop: 0.007724 s ~Mflops: 0.019 nsubm:7 otn:13 -Applying merge (10 -> 7 leaves, 13 th.) yielded SPEEDUP of 1.034x: 0.007983s -> 0.007724s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 2.217e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 4.053e-06s analysis) -3 iterations (13 th.) took 0.02795s; avg 0.009317s ( +/- 14.50/ 28.74 %); best 0.007966s; worst 0.01199s; std dev. 0.001893 (taking best). -Reference operation time is 0.00796604 s (0.01808 Mflops) with 13 threads. -After merge step 3: tpop: 0.007966 s ~Mflops: 0.018 nsubm:4 otn:13 -Applying merge (7 -> 4 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.031x: 0.007724s -> 0.007966s. -Merge (4 -> 1 leaves) took w.c.t. of 1.788e-05s, ~6.914e-06s of computing time (of which 3.099e-06s sorting, 1.907e-06s analysis) -3 iterations (13 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 28.57 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.15e-07 (taking best). -Reference operation time is 9.53674e-07 s (151 Mflops) with 13 threads. -After merge step 4: tpop: 9.537e-07 s ~Mflops: 150.995 nsubm:1 otn:13 -Applying merge (4 -> 1 leaves, 13 th.) yielded SPEEDUP of 8099.250x: 0.007724s -> 9.537e-07s, so taking this instance. +3 iterations (24 th.) took 0.000268s; avg 8.933e-05s ( +/- 10.59/ 15.30 %); best 7.987e-05s; worst 0.000103s; std dev. 9.9e-06 (taking best). +Reference operation time is 7.98702e-05 s (1.803 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 7.987e-05 Mflops: 1.803) +Merge (16 -> 10 leaves) took w.c.t. of 1.097e-05s, ~7.153e-06s of computing time (of which 0s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 0.001698s; avg 0.000566s ( +/- 92.59/184.25 %); best 4.196e-05s; worst 0.001609s; std dev. 0.0007374 (taking best). +Reference operation time is 4.19617e-05 s (3.432 Mflops) with 24 threads. +After merge step 1: tpop: 4.196e-05 s ~Mflops: 3.432 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded SPEEDUP of 1.903x: 7.987e-05s -> 4.196e-05s, so taking this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.097e-05s, ~3.099e-06s of computing time (of which 0s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 6.89e-05s; avg 2.297e-05s ( +/- 12.80/ 21.45 %); best 2.003e-05s; worst 2.789e-05s; std dev. 3.506e-06 (taking best). +Reference operation time is 2.00272e-05 s (7.19 Mflops) with 24 threads. +After merge step 2: tpop: 2.003e-05 s ~Mflops: 7.190 nsubm:7 otn:24 +Applying merge (10 -> 7 leaves, 24 th.) yielded SPEEDUP of 2.095x: 4.196e-05s -> 2.003e-05s, so taking this instance. +Merge (7 -> 4 leaves) took w.c.t. of 6.914e-06s, ~1.907e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 18.71/ 14.19 %); best 1.001e-05s; worst 1.407e-05s; std dev. 1.701e-06 (taking best). +Reference operation time is 1.00136e-05 s (14.38 Mflops) with 24 threads. +After merge step 3: tpop: 1.001e-05 s ~Mflops: 14.380 nsubm:4 otn:24 +Applying merge (7 -> 4 leaves, 24 th.) yielded SPEEDUP of 2.000x: 2.003e-05s -> 1.001e-05s, so taking this instance. +Merge (4 -> 1 leaves) took w.c.t. of 5.96e-06s, ~2.861e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 92.75/ 66.67 %); best 5.186e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.1856e-08 s (2777 Mflops) with 24 threads. +After merge step 4: tpop: 5.186e-08 s ~Mflops: 2776.919 nsubm:1 otn:24 +Applying merge (4 -> 1 leaves, 24 th.) yielded SPEEDUP of 193.103x: 1.001e-05s -> 5.186e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 4 merge steps (of max 6) (16 -> 1 subms) took 0.144s (of which 0.004272s partitioning, 0s I/O); computing times: 3.576e-05s in par. loops, 1.049e-05s sorting, 1.502e-05s analyzing) -Total merge + benchmarking process took 0.144s, equivalent to 150968.8/18.0 new/old ops (0.07595s for 3 clones -- as 79638.2/9.5 ops, or 26546.1/3.2 ops per clone), SPEEDUP of 8371.000x -Applying multi-merge (16 -> 1 leaves, 4 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 8371.000x (0.007983s -> 9.537e-07s), will amortize in 18.0 ops by saving 0.007982s per op. -In 1 tuning rounds (tot. 0.2s, 0.076s for constructor, 3 clones) obtained a SPEEDUP of 837000.0% (8371x) (from 0.01804 to 151 Mflops). -After 0.196017s, global autotuning declared speedup of 8371 x, when using threads count of 13 and a new matrix: -(6 x 6)[0x57bce1c0]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +A total of 4 merge steps (of max 6) (16 -> 1 subms) took 0.001995s (of which 5.722e-05s partitioning, 0s I/O); computing times: 1.502e-05s in par. loops, 9.537e-07s sorting, 6.199e-06s analyzing) +Total merge + benchmarking process took 0.001995s, equivalent to 38473.6/25.0 new/old ops (9.871e-05s for 5 clones -- as 1903.4/1.2 ops, or 380.7/0.2 ops per clone), SPEEDUP of 1540.230x +Applying multi-merge (16 -> 1 leaves, 4 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 1540.230x (7.987e-05s -> 5.186e-08s), will amortize in 25.0 ops by saving 7.982e-05s per op. +In 1 tuning rounds (tot. 0.0023s, 9.9e-05s for constructor, 5 clones) obtained a SPEEDUP of 153923.0% (1540x) (from 1.803 to 2777 Mflops). +After 0.002328s, global autotuning declared speedup of 1540.23 x, when using threads count of 24 and a new matrix: +(6 x 6)[0x57ff4a30]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' Considering C clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 5.994e-02 s (100.00 %) - analyzed arrays in 2.390e-02 s (39.88 %) - cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) + converted COO to RSB in 1.018e-04 s (100.00 %) + analyzed arrays in 4.387e-05 s (43.09 %) + cleaned-up arrays in 0.000e+00 s (0.00 %) + deduplicated arrays in 9.537e-07 s (0.94 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.599e-02 s (26.68 %) - memory allocations took 1.502e-05 s (0.03 %) - leafs setup took 4.053e-06 s (0.01 %) - halfword conversion took 2.002e-02 s (33.40 %) -Built (6 x 6)[0x57bd0100]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + shuffled partitions in 3.314e-05 s (32.55 %) + memory allocations took 1.001e-05 s (9.84 %) + leafs setup took 9.537e-07 s (0.94 %) + halfword conversion took 1.192e-05 s (11.71 %) +Built (6 x 6)[0x57ffc5b0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Base matrix: -(6 x 6)[0x57bd0100]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x57ffc5b0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.02393s; avg 0.007977s ( +/- 1.60/ 2.14 %); best 0.00785s; worst 0.008148s; std dev. 0.0001254 (taking best). -Reference operation time is 0.00784993 s (0.07338 Mflops) with 13 threads. -After 0.023960s, autotuning routine did not find a better threads count configuration. +3 iterations (24 th.) took 0.000464s; avg 0.0001547s ( +/- 7.50/ 4.68 %); best 0.0001431s; worst 0.0001619s; std dev. 8.287e-06 (taking best). +Reference operation time is 0.000143051 s (4.027 Mflops) with 24 threads. +After 0.000480s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.028s; avg 0.009334s ( +/- 14.47/ 28.74 %); best 0.007984s; worst 0.01202s; std dev. 0.001897 (taking best). -Reference operation time is 0.00798392 s (0.07214 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.007984 Mflops: 0.072) -Merge (22 -> 16 leaves) took w.c.t. of 0.003762s, ~1.526e-05s of computing time (of which 2.861e-06s sorting, 5.007e-06s analysis) -3 iterations (13 th.) took 0.04393s; avg 0.01464s ( +/- 19.58/ 37.77 %); best 0.01178s; worst 0.02017s; std dev. 0.003912 (taking best). -Reference operation time is 0.0117772 s (0.04891 Mflops) with 13 threads. -After merge step 1: tpop: 0.01178 s ~Mflops: 0.049 nsubm:16 otn:13 -Applying merge (22 -> 16 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.475x: 0.007984s -> 0.01178s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 merge steps (of max 6) (22 -> 16 subms) took 0.04774s (of which 0.003769s partitioning, 0s I/O); computing times: 1.526e-05s in par. loops, 2.861e-06s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.04774s, equivalent to 6.0/6.0 new/old ops (0.02797s for 1 clones -- as 3.5/3.5 ops, or 3.5/3.5 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 0.048s; avg 0.016s ( +/- 0.08/ 0.10 %); best 0.01599s; worst 0.01602s; std dev. 1.215e-05 (taking best). -Reference operation time is 0.015986 s (0.03603 Mflops) with 13 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01599 Mflops: 0.036) -Split (22 -> 28 leaves, 29 -> 37 subms) took 0.01195s (of which: 5.96e-06s analysis, -3.481e+09s mem.mgmt); compute time: 6.628e-05s overall, 6.676e-06s searches, 5.96e-05s shuffle, 1.812e-05s switch, 1.192e-05s quadrants. -3 iterations (13 th.) took 0.03598s; avg 0.01199s ( +/- 33.27/ 33.07 %); best 0.008003s; worst 0.01596s; std dev. 0.003248 (taking best). -Reference operation time is 0.008003 s (0.07197 Mflops) with 13 threads. -After split step 1: tpop: 0.008003 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (22 -> 28 leaves, 13 th.) yielded SPEEDUP of 1.997x: 0.01599s -> 0.008003s, so taking this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01196s (of which: 6.914e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.02799s; avg 0.00933s ( +/- 14.39/ 28.68 %); best 0.007988s; worst 0.01201s; std dev. 0.001892 (taking best). -Reference operation time is 0.00798798 s (0.07211 Mflops) with 13 threads. -After split step 2: tpop: 0.007988 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00188x): 0.008003s -> 0.007988s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01198s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.028s; avg 0.009332s ( +/- 14.30/ 28.41 %); best 0.007998s; worst 0.01198s; std dev. 0.001875 (taking best). -Reference operation time is 0.00799799 s (0.07202 Mflops) with 13 threads. -After split step 3: tpop: 0.007998 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00063x): 0.008003s -> 0.007998s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01598s (of which: 5.96e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.02396s; avg 0.007986s ( +/- 0.33/ 0.33 %); best 0.007959s; worst 0.008012s; std dev. 2.151e-05 (taking best). -Reference operation time is 0.00795913 s (0.07237 Mflops) with 13 threads. -After split step 4: tpop: 0.007959 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00551x): 0.008003s -> 0.007959s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01578s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.02396s; avg 0.007988s ( +/- 0.21/ 0.10 %); best 0.007971s; worst 0.007996s; std dev. 1.18e-05 (taking best). -Reference operation time is 0.00797105 s (0.07226 Mflops) with 13 threads. -After split step 5: tpop: 0.007971 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00401x): 0.008003s -> 0.007971s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01597s (of which: 5.96e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.028s; avg 0.009334s ( +/- 14.42/ 28.25 %); best 0.007988s; worst 0.01197s; std dev. 0.001864 (taking best). -Reference operation time is 0.00798798 s (0.07211 Mflops) with 13 threads. -After split step 6: tpop: 0.007988 s ~Mflops: 0.072 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00188x): 0.008003s -> 0.007988s, so IGNORING this instance. -A total of 6 split steps (of max 6) (22 -> 28 subms) took 0.28s (of which 0.08378s partitioning, 0s I/O); computing times: 6.628e-05s in par. loops, 6.676e-06s sorting, 3.481e-05s analyzing) -Total split + benchmarking process took 0.28s, equivalent to 35.0/17.5 new/old ops (0.07593s for 2 clones -- as 9.5/4.7 ops, or 4.7/2.4 ops per clone), SPEEDUP of 1.997x -Applying multi-split (22 -> 28 leaves, 1 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 1.997x (0.01599s -> 0.008003s), will amortize in 35.1 ops by saving 0.007983s per op. -In 1 tuning rounds (tot. 0.48s, 0.1s for constructor, 3 clones) obtained a SPEEDUP of 99.7% (1.997x) (from 0.03603 to 0.07197 Mflops). -After 0.480051s, global autotuning declared speedup of 1.9975 x, when using threads count of 13 and a new matrix: -(6 x 6)[0x57bd1e50]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +3 iterations (24 th.) took 0.000514s; avg 0.0001713s ( +/- 12.48/ 24.81 %); best 0.00015s; worst 0.0002139s; std dev. 3.006e-05 (taking best). +Reference operation time is 0.000149965 s (3.841 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.00015 Mflops: 3.841) +Merge (22 -> 16 leaves) took w.c.t. of 1.407e-05s, ~7.153e-06s of computing time (of which 0s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 0.001835s; avg 0.0006116s ( +/- 86.59/170.57 %); best 8.202e-05s; worst 0.001655s; std dev. 0.0007377 (taking best). +Reference operation time is 8.2016e-05 s (7.023 Mflops) with 24 threads. +After merge step 1: tpop: 8.202e-05 s ~Mflops: 7.023 nsubm:16 otn:24 +Applying merge (22 -> 16 leaves, 24 th.) yielded SPEEDUP of 1.828x: 0.00015s -> 8.202e-05s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 1.502e-05s, ~6.914e-06s of computing time (of which 1.907e-06s sorting, 2.861e-06s analysis) +3 iterations (24 th.) took 0.00163s; avg 0.0005434s ( +/- 92.28/183.98 %); best 4.196e-05s; worst 0.001543s; std dev. 0.0007069 (taking best). +Reference operation time is 4.19617e-05 s (13.73 Mflops) with 24 threads. +After merge step 2: tpop: 4.196e-05 s ~Mflops: 13.727 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded SPEEDUP of 1.955x: 8.202e-05s -> 4.196e-05s, so taking this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.001e-05s, ~2.861e-06s of computing time (of which 9.537e-07s sorting, 2.146e-06s analysis) +3 iterations (24 th.) took 6.39e-05s; avg 2.13e-05s ( +/- 16.04/ 22.01 %); best 1.788e-05s; worst 2.599e-05s; std dev. 3.429e-06 (taking best). +Reference operation time is 1.78814e-05 s (32.21 Mflops) with 24 threads. +After merge step 3: tpop: 1.788e-05 s ~Mflops: 32.212 nsubm:7 otn:24 +Applying merge (10 -> 7 leaves, 24 th.) yielded SPEEDUP of 2.347x: 4.196e-05s -> 1.788e-05s, so taking this instance. +Merge (7 -> 4 leaves) took w.c.t. of 5.96e-06s, ~2.146e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 3.29e-05s; avg 1.097e-05s ( +/- 8.70/ 8.70 %); best 1.001e-05s; worst 1.192e-05s; std dev. 7.787e-07 (taking best). +Reference operation time is 1.00136e-05 s (57.52 Mflops) with 24 threads. +After merge step 4: tpop: 1.001e-05 s ~Mflops: 57.522 nsubm:4 otn:24 +Applying merge (7 -> 4 leaves, 24 th.) yielded SPEEDUP of 1.786x: 1.788e-05s -> 1.001e-05s, so taking this instance. +Merge (4 -> 1 leaves) took w.c.t. of 6.914e-06s, ~3.099e-06s of computing time (of which 2.146e-06s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). +Reference operation time is 9.53674e-07 s (604 Mflops) with 24 threads. +After merge step 5: tpop: 9.537e-07 s ~Mflops: 603.980 nsubm:1 otn:24 +Applying merge (4 -> 1 leaves, 24 th.) yielded SPEEDUP of 10.500x: 1.001e-05s -> 9.537e-07s, so taking this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.003798s (of which 6.557e-05s partitioning, 0s I/O); computing times: 2.217e-05s in par. loops, 6.199e-06s sorting, 9.06e-06s analyzing) +Total merge + benchmarking process took 0.003798s, equivalent to 3982.5/25.3 new/old ops (0.0001209s for 6 clones -- as 126.8/0.8 ops, or 21.1/0.1 ops per clone), SPEEDUP of 157.250x +Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 157.250x (0.00015s -> 9.537e-07s), will amortize in 25.5 ops by saving 0.000149s per op. +In 1 tuning rounds (tot. 0.0044s, 0.00012s for constructor, 6 clones) obtained a SPEEDUP of 15625.0% (157.2x) (from 3.841 to 604 Mflops). +After 0.004387s, global autotuning declared speedup of 157.25 x, when using threads count of 24 and a new matrix: +(6 x 6)[0x57ff4980]{C} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' Considering Z clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 5.191e-02 s (100.00 %) - analyzed arrays in 1.989e-02 s (38.33 %) - cleaned-up arrays in 2.146e-06 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) + converted COO to RSB in 1.111e-04 s (100.00 %) + analyzed arrays in 4.482e-05 s (40.34 %) + cleaned-up arrays in 1.192e-06 s (1.07 %) + deduplicated arrays in 0.000e+00 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.875e-02 s (36.13 %) - memory allocations took 1.025e-05 s (0.02 %) - leafs setup took 5.007e-06 s (0.01 %) - halfword conversion took 1.324e-02 s (25.51 %) -Built (6 x 6)[0x57bcfff0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' + shuffled partitions in 3.695e-05 s (33.26 %) + memory allocations took 1.407e-05 s (12.66 %) + leafs setup took 1.907e-06 s (1.72 %) + halfword conversion took 1.216e-05 s (10.94 %) +Built (6 x 6)[0x57ffb3d0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Base matrix: -(6 x 6)[0x57bcfff0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +(6 x 6)[0x57ffb3d0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.02794s; avg 0.009313s ( +/- 14.72/ 28.88 %); best 0.007942s; worst 0.012s; std dev. 0.001902 (taking best). -Reference operation time is 0.00794196 s (0.07253 Mflops) with 13 threads. -After 0.027970s, autotuning routine did not find a better threads count configuration. +3 iterations (24 th.) took 0.000669s; avg 0.000223s ( +/- 3.14/ 1.78 %); best 0.000216s; worst 0.000227s; std dev. 4.961e-06 (taking best). +Reference operation time is 0.000216007 s (2.667 Mflops) with 24 threads. +After 0.000686s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (13 th.) took 0.02399s; avg 0.007995s ( +/- 0.10/ 0.13 %); best 0.007987s; worst 0.008006s; std dev. 7.86e-06 (taking best). -Reference operation time is 0.00798702 s (0.07212 Mflops) with 13 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.007987 Mflops: 0.072) -Merge (28 -> 22 leaves) took w.c.t. of 0.003937s, ~2.48e-05s of computing time (of which 4.292e-06s sorting, 5.96e-06s analysis) -3 iterations (13 th.) took 0.03579s; avg 0.01193s ( +/- 33.13/ 32.42 %); best 0.007977s; worst 0.0158s; std dev. 0.003193 (taking best). -Reference operation time is 0.00797701 s (0.07221 Mflops) with 13 threads. -After merge step 1: tpop: 0.007977 s ~Mflops: 0.072 nsubm:22 otn:13 -Applying merge (28 -> 22 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00126x): 0.007987s -> 0.007977s, so IGNORING this instance. -Merge (22 -> 16 leaves) took w.c.t. of 0.004016s, ~3.624e-05s of computing time (of which 4.053e-06s sorting, 1.001e-05s analysis) -3 iterations (13 th.) took 0.04395s; avg 0.01465s ( +/- 18.22/ 9.30 %); best 0.01198s; worst 0.01601s; std dev. 0.001888 (taking best). -Reference operation time is 0.011981 s (0.04808 Mflops) with 13 threads. -After merge step 2: tpop: 0.01198 s ~Mflops: 0.048 nsubm:16 otn:13 -Applying merge (22 -> 16 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.500x: 0.007987s -> 0.01198s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 merge steps (of max 6) (28 -> 16 subms) took 0.08779s (of which 0.007971s partitioning, 0s I/O); computing times: 6.104e-05s in par. loops, 8.345e-06s sorting, 1.597e-05s analyzing) -Total merge + benchmarking process took 0.08779s, equivalent to 11.0/11.0 new/old ops (0.02398s for 1 clones -- as 3.0/3.0 ops, or 3.0/3.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (13 th.) took 0.04799s; avg 0.016s ( +/- 0.04/ 0.06 %); best 0.01599s; worst 0.01601s; std dev. 7.123e-06 (taking best). -Reference operation time is 0.0159891 s (0.03602 Mflops) with 13 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.01599 Mflops: 0.036) -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02396s (of which: 1.001e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.04795s; avg 0.01598s ( +/- 0.15/ 0.10 %); best 0.01596s; worst 0.016s; std dev. 1.704e-05 (taking best). -Reference operation time is 0.0159612 s (0.03609 Mflops) with 13 threads. -After split step 1: tpop: 0.01596 s ~Mflops: 0.036 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00175x): 0.01599s -> 0.01596s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02397s (of which: 1.001e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.04795s; avg 0.01598s ( +/- 0.14/ 0.16 %); best 0.01596s; worst 0.01601s; std dev. 1.944e-05 (taking best). -Reference operation time is 0.0159619 s (0.03609 Mflops) with 13 threads. -After split step 2: tpop: 0.01596 s ~Mflops: 0.036 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00170x): 0.01599s -> 0.01596s, so IGNORING this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02396s (of which: 1.097e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.03447s; avg 0.01149s ( +/- 43.27/ 38.69 %); best 0.006518s; worst 0.01594s; std dev. 0.003863 (taking best). -Reference operation time is 0.00651813 s (0.08837 Mflops) with 13 threads. -After split step 3: tpop: 0.006518 s ~Mflops: 0.088 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded SPEEDUP of 2.453x: 0.01599s -> 0.006518s, so taking this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 5.913e-05s (of which: 7.868e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.008407s; avg 0.002802s ( +/- 75.56/149.37 %); best 0.000685s; worst 0.006988s; std dev. 0.00296 (taking best). -Reference operation time is 0.000684977 s (0.8409 Mflops) with 13 threads. -After split step 4: tpop: 0.000685 s ~Mflops: 0.841 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded SPEEDUP of 9.516x: 0.006518s -> 0.000685s, so taking this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 5.198e-05s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.003545s; avg 0.001182s ( +/- 44.57/ 79.58 %); best 0.0006549s; worst 0.002122s; std dev. 0.0006665 (taking best). -Reference operation time is 0.000654936 s (0.8795 Mflops) with 13 threads. -After split step 5: tpop: 0.0006549 s ~Mflops: 0.879 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded SPEEDUP of 1.046x: 0.000685s -> 0.0006549s, so taking this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 4.983e-05s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (13 th.) took 0.002188s; avg 0.0007293s ( +/- 3.63/ 5.98 %); best 0.0007029s; worst 0.000773s; std dev. 3.108e-05 (taking best). -Reference operation time is 0.000702858 s (0.8195 Mflops) with 13 threads. -After split step 6: tpop: 0.0007029 s ~Mflops: 0.820 nsubm:28 otn:13 -Applying split (28 -> 28 leaves, 13 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.073x: 0.0006549s -> 0.0007029s. -A total of 6 split steps (of max 6) (28 -> 28 subms) took 0.2174s (of which 0.07251s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 4.888e-05s analyzing) -Total split + benchmarking process took 0.2174s, equivalent to 331.9/13.6 new/old ops (0.04409s for 4 clones -- as 67.3/2.8 ops, or 16.8/0.7 ops per clone), SPEEDUP of 24.413x -Applying multi-split (28 -> 28 leaves, 5 steps, 0 -> 13 th.sp.) yielded SPEEDUP of 24.413x (0.01599s -> 0.0006549s), will amortize in 14.2 ops by saving 0.01533s per op. -In 1 tuning rounds (tot. 0.45s, 0.068s for constructor, 5 clones) obtained a SPEEDUP of 2341.3% (24.41x) (from 0.03602 to 0.8795 Mflops). -After 0.445420s, global autotuning declared speedup of 24.4132 x, when using threads count of 13 and a new matrix: -(6 x 6)[0x57bd2a40]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +3 iterations (24 th.) took 0.0006189s; avg 0.0002063s ( +/- 4.55/ 4.70 %); best 0.0001969s; worst 0.000216s; std dev. 7.79e-06 (taking best). +Reference operation time is 0.000196934 s (2.925 Mflops) with 24 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.0001969 Mflops: 2.925) +Merge (28 -> 22 leaves) took w.c.t. of 1.407e-05s, ~7.153e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (24 th.) took 0.002047s; avg 0.0006824s ( +/- 76.24/152.06 %); best 0.0001621s; worst 0.00172s; std dev. 0.0007337 (taking best). +Reference operation time is 0.000162125 s (3.553 Mflops) with 24 threads. +After merge step 1: tpop: 0.0001621 s ~Mflops: 3.553 nsubm:22 otn:24 +Applying merge (28 -> 22 leaves, 24 th.) yielded SPEEDUP of 1.215x: 0.0001969s -> 0.0001621s, so taking this instance. +Merge (22 -> 16 leaves) took w.c.t. of 1.287e-05s, ~5.722e-06s of computing time (of which 1.907e-06s sorting, 2.861e-06s analysis) +3 iterations (24 th.) took 0.005676s; avg 0.001892s ( +/- 94.71/107.56 %); best 0.0001001s; worst 0.003927s; std dev. 0.001572 (taking best). +Reference operation time is 0.000100136 s (5.752 Mflops) with 24 threads. +After merge step 2: tpop: 0.0001001 s ~Mflops: 5.752 nsubm:16 otn:24 +Applying merge (22 -> 16 leaves, 24 th.) yielded SPEEDUP of 1.619x: 0.0001621s -> 0.0001001s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 1.407e-05s, ~8.821e-06s of computing time (of which 2.146e-06s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 0.001796s; avg 0.0005987s ( +/- 92.63/184.79 %); best 4.411e-05s; worst 0.001705s; std dev. 0.0007822 (taking best). +Reference operation time is 4.41074e-05 s (13.06 Mflops) with 24 threads. +After merge step 3: tpop: 4.411e-05 s ~Mflops: 13.059 nsubm:10 otn:24 +Applying merge (16 -> 10 leaves, 24 th.) yielded SPEEDUP of 2.270x: 0.0001001s -> 4.411e-05s, so taking this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.097e-05s, ~2.861e-06s of computing time (of which 0s sorting, 1.907e-06s analysis) +3 iterations (24 th.) took 7.2e-05s; avg 2.4e-05s ( +/- 4.64/ 8.28 %); best 2.289e-05s; worst 2.599e-05s; std dev. 1.408e-06 (taking best). +Reference operation time is 2.28882e-05 s (25.17 Mflops) with 24 threads. +After merge step 4: tpop: 2.289e-05 s ~Mflops: 25.166 nsubm:7 otn:24 +Applying merge (10 -> 7 leaves, 24 th.) yielded SPEEDUP of 1.927x: 4.411e-05s -> 2.289e-05s, so taking this instance. +Merge (7 -> 4 leaves) took w.c.t. of 5.96e-06s, ~1.907e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) +3 iterations (24 th.) took 3.719e-05s; avg 1.24e-05s ( +/- 9.62/ 11.54 %); best 1.121e-05s; worst 1.383e-05s; std dev. 1.084e-06 (taking best). +Reference operation time is 1.12057e-05 s (51.4 Mflops) with 24 threads. +After merge step 5: tpop: 1.121e-05 s ~Mflops: 51.403 nsubm:4 otn:24 +Applying merge (7 -> 4 leaves, 24 th.) yielded SPEEDUP of 2.043x: 2.289e-05s -> 1.121e-05s, so taking this instance. +Merge (4 -> 1 leaves) took w.c.t. of 6.914e-06s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 0s analysis) +3 iterations (24 th.) took 3.815e-06s; avg 1.272e-06s ( +/- 25.00/ 50.00 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.496e-07 (taking best). +Reference operation time is 9.53674e-07 s (604 Mflops) with 24 threads. +After merge step 6: tpop: 9.537e-07 s ~Mflops: 603.980 nsubm:1 otn:24 +Applying merge (4 -> 1 leaves, 24 th.) yielded SPEEDUP of 11.750x: 1.121e-05s -> 9.537e-07s, so taking this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 6 merge steps (of max 6) (28 -> 1 subms) took 0.009942s (of which 8.297e-05s partitioning, 0s I/O); computing times: 2.956e-05s in par. loops, 7.391e-06s sorting, 1.073e-05s analyzing) +Total merge + benchmarking process took 0.009942s, equivalent to 10425.0/50.5 new/old ops (0.0001531s for 7 clones -- as 160.5/0.8 ops, or 22.9/0.1 ops per clone), SPEEDUP of 206.500x +Applying multi-merge (28 -> 1 leaves, 6 steps, 0 -> 24 th.sp.) yielded SPEEDUP of 206.500x (0.0001969s -> 9.537e-07s), will amortize in 50.7 ops by saving 0.000196s per op. +In 1 tuning rounds (tot. 0.011s, 0.00015s for constructor, 7 clones) obtained a SPEEDUP of 20550.0% (206.5x) (from 2.925 to 604 Mflops). +After 0.010635s, global autotuning declared speedup of 206.5 x, when using threads count of 24 and a new matrix: +(6 x 6)[0x57ff59c0]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -9274,10 +9342,10 @@ 4 0 DIFF PRINT TEST END Beginning large binary search test. -Detected 4014092288 bytes of memory, comprehensive of 1360900096 of free memory. +Detected 3995713536 bytes of memory, comprehensive of 2034978816 of free memory. On this system, maximal array of coordinates can have 2147483137 elements and occupy 4294965252 bytes. -Will perform the test using less memory (973 MB) than on the maximal coordinate indices array (1020675072) allows. -(c)allocated 255168768 nnz (1020675072 bytes) +Will perform the test using less memory (431 MB) than on the maximal coordinate indices array (452492288) allows. +(c)allocated 113123072 nnz (452492288 bytes) Succeeded retrieving array last element. Successfully performed large binary search test. BASIC SPARSE BLAS TEST: BEGIN @@ -9286,7 +9354,7 @@ got RSB_IO_WANT_IS_INITIALIZED_MARKER: 1 INIT INTERFACE TEST: END (SUCCESS) DEVEL PRINT TEST: BEGIN -(4 x 4)[0x57b87770]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(4 x 4)[0x56bc18c0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9295,8 +9363,8 @@ RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS | RSB_FLAG_OWN_PARTITIONING_ARRAYS | RSB_FLAG_SORT_INPUT -(2 x 2)[0x57b87840]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x57b87910]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56bc1990]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56bc1a60]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' #R 4 x 4, 4 nnz (16 bytes), 16 index space for bytes, 416 bytes for 2 structs (2 of which are on the diagonal) (1e+02% of nnz are on the diagonal) #N at 0 0, 4 x 4, 4 nnz ( 25%) #T at 0 0, 2 x 2, 2 nnz ( 50%) @@ -9304,9 +9372,9 @@ ( 0x2046186 = { rec:1 coo:1 css:1 hw:1 ic:1 fi:0 symflags: } ) DEVEL PRINT TEST: END PRINT TEST: BEGIN [QUIET] -(2 x 2)[0x57b87840]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x57b87910]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(4 x 4)[0x57b87770]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(2 x 2)[0x56bc1990]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56bc1a60]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(4 x 4)[0x56bc18c0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9324,8 +9392,8 @@ BASIC PRIMITIVES TEST: BEGIN BASIC PRIMITIVES TEST: END (SUCCESS) ADVANCED SPARSE BLAS TEST: BEGIN [limit 30.000000s] [QUIET] -Terminating testing earlier due to user timeout request: test took 30.816023 s, max allowed was 30.000000. - PASSED:19585 +Terminating testing earlier due to user timeout request: test took 30.002416 s, max allowed was 30.000000. + PASSED:27842 FAILED:0 ADVANCED SPARSE BLAS TEST: END (SUCCESS) gmake qtests -C librsbpp @@ -9334,7 +9402,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' ./rsbtt -if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh +if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh ++ ./rsbpp Td,s G.mtx ++ grep Z-sort ++ wc -l @@ -9386,8 +9454,8 @@ + test 9 = 9 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1,4,8sFv -++ wc -l ++ grep spmm- +++ wc -l + test 9 = 9 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1sFvtN,T @@ -9473,7 +9541,7 @@ 0 0 0 0 before tuning for SPMV: -(3 x 3)[0x56a3c5c0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' +(3 x 3)[0x5669c5a0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' ** x: 1.1 @@ -9562,18 +9630,18 @@ BEGIN Rsb_Matrix_test_multimatrix_ms_mnrhs BEGIN -(3 x 3)[0x56a68d90]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.0678: -(3 x 3)[0x56a6b280]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x56a68d90]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.35422: -(3 x 3)[0x56a6b9e0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x56a68d90]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.51166: -(3 x 3)[0x56a6b280]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x56a68d90]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.60221: -(3 x 3)[0x56a6b280]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x566c9970]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.26667: +(3 x 3)[0x566ce180]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x566c9970]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.16418: +(3 x 3)[0x566ce180]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x566c9970]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.27586: +(3 x 3)[0x566c9e20]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x566c9970]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.00613: +(3 x 3)[0x566c9e20]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' END OK: terminating with no allocations registered in librsb [*] tests terminated successfully ! @@ -9590,10 +9658,10 @@ ./rsbtest --no-tune --max_t 0.01 --serial | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q Building ./rsbtest --no-tune --max_t 0.01 --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --no-trans --alpha 1 --type d --rand --serial . | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q adding ! ./rsbtest --mkl A.mkl -running on ionos12-i386 +running on i-capture-the-hostname Built without the MKL. ( ! ./rsbtest --unrecognized-option-triggers-abort ) -running on ionos12-i386 +running on i-capture-the-hostname /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest/.libs/rsbtest: unrecognized option '--unrecognized-option-triggers-abort' unrecognized option, aborting. ( ./rsbtest --no-tune --max_t 0.01 --skip-loading-hermitian-matrices --skip-loading-unsymmetric-matrices --tune-maxt 10 --tune-maxr 10 --verbose-tuning --extra-verbose-interface --min_t 0.01 --max_t 0.01 --mintimes 1 --maxtimes 1 --verbose --skip-loading-symmetric-matrices A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q skip ) @@ -9602,7 +9670,7 @@ ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --types all --nthreads 1,2 --maxtimes 1 -+ A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q 2.threads ) ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --render-only A.mtx > /dev/null ) ! ./rsbtest --no-tune --max_t 0.01 --quiet --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --render --no-trans --alpha 1 --type all A.mtx -running on ionos12-i386 +running on i-capture-the-hostname Will not invoke autotuning routine. Benchmark will sample for at most 0.01 s Built without render support! @@ -9654,7 +9722,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' - /bin/bash ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' + /bin/sh ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' libtool: install: /usr/bin/install -c .libs/librsb.so.0.0.0 /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu/librsb.so.0.0.0 libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so.0 || { rm -f librsb.so.0 && ln -s librsb.so.0.0.0 librsb.so.0; }; }) libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so || { rm -f librsb.so && ln -s librsb.so.0.0.0 librsb.so; }; }) @@ -9664,7 +9732,7 @@ libtool: install: ranlib /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu/librsb.a libtool: warning: remember to run 'libtool --finish /usr/lib/i386-linux-gnu' /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' - /bin/bash ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' + /bin/sh ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' libtool: warning: 'librsb.la' has not been installed in '/usr/lib/i386-linux-gnu' libtool: install: /usr/bin/install -c .libs/rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin/rsbench /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' @@ -9791,12 +9859,12 @@ dh_gencontrol dh_md5sums dh_builddeb -dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-7_i386.deb'. -dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-7_i386.deb'. -dpkg-deb: building package 'librsb0t64-dbgsym' in '../librsb0t64-dbgsym_1.3.0.2+dfsg-7_i386.deb'. dpkg-deb: building package 'librsb0t64' in '../librsb0t64_1.3.0.2+dfsg-7_i386.deb'. -dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-7_all.deb'. +dpkg-deb: building package 'librsb0t64-dbgsym' in '../librsb0t64-dbgsym_1.3.0.2+dfsg-7_i386.deb'. +dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-7_i386.deb'. +dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-7_i386.deb'. dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-7_i386.deb'. +dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-7_all.deb'. dpkg-genbuildinfo --build=binary -O../librsb_1.3.0.2+dfsg-7_i386.buildinfo dpkg-genchanges --build=binary -O../librsb_1.3.0.2+dfsg-7_i386.changes dpkg-genchanges: info: binary-only upload (no source code included) @@ -9805,12 +9873,14 @@ dpkg-buildpackage: info: binary-only upload (no source included) dpkg-genchanges: info: not including original source code in upload I: copying local configuration +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/B01_cleanup starting +I: user script /srv/workspace/pbuilder/60845/tmp/hooks/B01_cleanup finished I: unmounting dev/ptmx filesystem I: unmounting dev/pts filesystem I: unmounting dev/shm filesystem I: unmounting proc filesystem I: unmounting sys filesystem I: cleaning the build env -I: removing directory /srv/workspace/pbuilder/5650 and its subdirectories -I: Current time: Mon Feb 24 18:18:05 -12 2025 -I: pbuilder-time-stamp: 1740464285 +I: removing directory /srv/workspace/pbuilder/60845 and its subdirectories +I: Current time: Tue Mar 31 02:53:06 +14 2026 +I: pbuilder-time-stamp: 1774875186