Diff of the two buildlogs: -- --- b1/build.log 2025-02-21 20:08:40.470993220 +0000 +++ b2/build.log 2025-02-21 20:26:38.201972361 +0000 @@ -1,6 +1,6 @@ I: pbuilder: network access will be disabled during build -I: Current time: Fri Feb 21 06:57:34 -12 2025 -I: pbuilder-time-stamp: 1740164254 +I: Current time: Fri Mar 27 16:31:42 +14 2026 +I: pbuilder-time-stamp: 1774578702 I: Building the build Environment I: extracting base tarball [/var/cache/pbuilder/unstable-reproducible-base.tgz] I: copying local configuration @@ -25,52 +25,84 @@ dpkg-source: info: applying auto-gitignore I: Not using root during the build. I: Installing the build-deps -I: user script /srv/workspace/pbuilder/287613/tmp/hooks/D02_print_environment starting +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/D01_modify_environment starting +debug: Running on ionos15-amd64. +I: Changing host+domainname to test build reproducibility +I: Adding a custom variable just for the fun of it... +I: Changing /bin/sh to bash +'/bin/sh' -> '/bin/bash' +lrwxrwxrwx 1 root root 9 Mar 27 02:31 /bin/sh -> /bin/bash +I: Setting pbuilder2's login shell to /bin/bash +I: Setting pbuilder2's GECOS to second user,second room,second work-phone,second home-phone,second other +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/D01_modify_environment finished +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/D02_print_environment starting I: set - BUILDDIR='/build/reproducible-path' - BUILDUSERGECOS='first user,first room,first work-phone,first home-phone,first other' - BUILDUSERNAME='pbuilder1' - BUILD_ARCH='amd64' - DEBIAN_FRONTEND='noninteractive' - DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=20 ' - DISTRIBUTION='unstable' - HOME='/root' - HOST_ARCH='amd64' + BASH=/bin/sh + BASHOPTS=checkwinsize:cmdhist:complete_fullquote:extquote:force_fignore:globasciiranges:globskipdots:hostcomplete:interactive_comments:patsub_replacement:progcomp:promptvars:sourcepath + BASH_ALIASES=() + BASH_ARGC=() + BASH_ARGV=() + BASH_CMDS=() + BASH_LINENO=([0]="12" [1]="0") + BASH_LOADABLES_PATH=/usr/local/lib/bash:/usr/lib/bash:/opt/local/lib/bash:/usr/pkg/lib/bash:/opt/pkg/lib/bash:. + BASH_SOURCE=([0]="/tmp/hooks/D02_print_environment" [1]="/tmp/hooks/D02_print_environment") + BASH_VERSINFO=([0]="5" [1]="2" [2]="37" [3]="1" [4]="release" [5]="x86_64-pc-linux-gnu") + BASH_VERSION='5.2.37(1)-release' + BUILDDIR=/build/reproducible-path + BUILDUSERGECOS='second user,second room,second work-phone,second home-phone,second other' + BUILDUSERNAME=pbuilder2 + BUILD_ARCH=amd64 + DEBIAN_FRONTEND=noninteractive + DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=42 ' + DIRSTACK=() + DISTRIBUTION=unstable + EUID=0 + FUNCNAME=([0]="Echo" [1]="main") + GROUPS=() + HOME=/root + HOSTNAME=i-capture-the-hostname + HOSTTYPE=x86_64 + HOST_ARCH=amd64 IFS=' ' - INVOCATION_ID='cd064656839343a2b6777f626a5b36a6' - LANG='C' - LANGUAGE='en_US:en' - LC_ALL='C' - MAIL='/var/mail/root' - OPTIND='1' - PATH='/usr/sbin:/usr/bin:/sbin:/bin:/usr/games' - PBCURRENTCOMMANDLINEOPERATION='build' - PBUILDER_OPERATION='build' - PBUILDER_PKGDATADIR='/usr/share/pbuilder' - PBUILDER_PKGLIBDIR='/usr/lib/pbuilder' - PBUILDER_SYSCONFDIR='/etc' - PPID='287613' - PS1='# ' - PS2='> ' + INVOCATION_ID=81aa9caec456427895059883d64c0673 + LANG=C + LANGUAGE=et_EE:et + LC_ALL=C + MACHTYPE=x86_64-pc-linux-gnu + MAIL=/var/mail/root + OPTERR=1 + OPTIND=1 + OSTYPE=linux-gnu + PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path + PBCURRENTCOMMANDLINEOPERATION=build + PBUILDER_OPERATION=build + PBUILDER_PKGDATADIR=/usr/share/pbuilder + PBUILDER_PKGLIBDIR=/usr/lib/pbuilder + PBUILDER_SYSCONFDIR=/etc + PIPESTATUS=([0]="0") + POSIXLY_CORRECT=y + PPID=3426253 PS4='+ ' - PWD='/' - SHELL='/bin/bash' - SHLVL='2' - SUDO_COMMAND='/usr/bin/timeout -k 18.1h 18h /usr/bin/ionice -c 3 /usr/bin/nice /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.FrkB7dI2/pbuilderrc_eUbf --distribution unstable --hookdir /etc/pbuilder/first-build-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/unstable-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.FrkB7dI2/b1 --logfile b1/build.log librsb_1.3.0.2+dfsg-7.dsc' - SUDO_GID='110' - SUDO_UID='105' - SUDO_USER='jenkins' - TERM='unknown' - TZ='/usr/share/zoneinfo/Etc/GMT+12' - USER='root' - _='/usr/bin/systemd-run' - http_proxy='http://46.16.76.132:3128' + PWD=/ + SHELL=/bin/bash + SHELLOPTS=braceexpand:errexit:hashall:interactive-comments:posix + SHLVL=3 + SUDO_COMMAND='/usr/bin/timeout -k 24.1h 24h /usr/bin/ionice -c 3 /usr/bin/nice -n 11 /usr/bin/unshare --uts -- /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.FrkB7dI2/pbuilderrc_R8K1 --distribution unstable --hookdir /etc/pbuilder/rebuild-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/unstable-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.FrkB7dI2/b2 --logfile b2/build.log librsb_1.3.0.2+dfsg-7.dsc' + SUDO_GID=111 + SUDO_UID=106 + SUDO_USER=jenkins + TERM=unknown + TZ=/usr/share/zoneinfo/Etc/GMT-14 + UID=0 + USER=root + _='I: set' + http_proxy=http://213.165.73.152:3128 I: uname -a - Linux ionos1-amd64 6.1.0-31-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.128-1 (2025-02-07) x86_64 GNU/Linux + Linux i-capture-the-hostname 6.12.9+bpo-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.12.9-1~bpo12+1 (2025-01-19) x86_64 GNU/Linux I: ls -l /bin - lrwxrwxrwx 1 root root 7 Nov 22 14:40 /bin -> usr/bin -I: user script /srv/workspace/pbuilder/287613/tmp/hooks/D02_print_environment finished + lrwxrwxrwx 1 root root 7 Nov 22 2024 /bin -> usr/bin +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/D02_print_environment finished -> Attempting to satisfy build-dependencies -> Creating pbuilder-satisfydepends-dummy package Package: pbuilder-satisfydepends-dummy @@ -296,7 +328,7 @@ Get: 178 http://deb.debian.org/debian unstable/main amd64 libltdl-dev amd64 2.5.4-3 [167 kB] Get: 179 http://deb.debian.org/debian unstable/main amd64 libhwloc-dev amd64 2.12.0-1 [255 kB] Get: 180 http://deb.debian.org/debian unstable/main amd64 zlib1g-dev amd64 1:1.3.dfsg+really1.3.1-1+b1 [920 kB] -Fetched 382 MB in 22s (17.6 MB/s) +Fetched 382 MB in 10s (39.9 MB/s) Preconfiguring packages ... Selecting previously unselected package liblocale-gettext-perl. (Reading database ... (Reading database ... 5% (Reading database ... 10% (Reading database ... 15% (Reading database ... 20% (Reading database ... 25% (Reading database ... 30% (Reading database ... 35% (Reading database ... 40% (Reading database ... 45% (Reading database ... 50% (Reading database ... 55% (Reading database ... 60% (Reading database ... 65% (Reading database ... 70% (Reading database ... 75% (Reading database ... 80% (Reading database ... 85% (Reading database ... 90% (Reading database ... 95% (Reading database ... 100% (Reading database ... 19775 files and directories currently installed.) @@ -894,8 +926,8 @@ Setting up tzdata (2025a-2) ... Current default time zone: 'Etc/UTC' -Local time is now: Fri Feb 21 19:01:16 UTC 2025. -Universal Time is now: Fri Feb 21 19:01:16 UTC 2025. +Local time is now: Fri Mar 27 02:32:36 UTC 2026. +Universal Time is now: Fri Mar 27 02:32:36 UTC 2026. Run 'dpkg-reconfigure tzdata' if you wish to change it. Setting up libfontenc1:amd64 (1:1.1.8-1+b2) ... @@ -1062,7 +1094,11 @@ Building tag database... -> Finished parsing the build-deps I: Building the package -I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-7_source.changes +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/A99_set_merged_usr starting +Not re-configuring usrmerge for unstable +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/A99_set_merged_usr finished +hostname: Name or service not known +I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-7_source.changes dpkg-buildpackage: info: source package librsb dpkg-buildpackage: info: source version 1.3.0.2+dfsg-7 dpkg-buildpackage: info: source distribution unstable @@ -1082,62 +1118,62 @@ dh binary dh_update_autotools_config dh_autoreconf -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found libtoolize: putting auxiliary files in '.'. libtoolize: copying file './ltmain.sh' libtoolize: putting macros in AC_CONFIG_MACRO_DIRS, 'm4'. @@ -1179,90 +1215,90 @@ libtoolize: copying file 'm4/ltsugar.m4' libtoolize: copying file 'm4/ltversion.m4' libtoolize: copying file 'm4/lt~obsolete.m4' -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found configure.ac:66: installing './compile' configure.ac:65: installing './missing' Makefile.am: installing './depcomp' @@ -1448,7 +1484,7 @@ checking for m4... m4 checking for gmake... gmake checking for ggrep... /usr/bin/grep -checking for bash... /bin/bash +checking for bash... /bin/sh checking for gsed... /usr/bin/sed checking for cmp... cmp checking for basename... basename @@ -1565,7 +1601,7 @@ configure: It appears that Fortran programs can be linked without using the Fortran linker. configure: Using OPENMP_CFLAGS ok for linking an OpenMP program: adding it to LIBS. checking if your have a usable getrusage() ... 1 -checking for /bin/bash... /bin/bash +checking for /bin/sh... /bin/sh configure: Will not use Google Test. configure: You seem to not have GNU Octave or have disabled 'int' type. Part of the test suite will not be generated. If you want more testing capabilities, you should enable the 'int' type as well. checking that generated files are newer than configure... done @@ -1587,7 +1623,7 @@ config.status: executing depfiles commands config.status: executing libtool commands === configuring in librsbpp (/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1712,7 +1748,7 @@ configure: Will not use Google Test. checking whether you have std::thread... yes checking whether you have std::mutex... yes -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1721,7 +1757,7 @@ config.status: executing libtool commands configure: Successfully created a Makefile. === configuring in rsblib (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1833,7 +1869,7 @@ checking for filesystem... yes checking for main in -lstdc++fs... yes configure: Assuming you are yet to build librsb.la. (set LIBS= -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la) -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1843,7 +1879,7 @@ config.status: executing libtool commands configure: Created a Makefile. === configuring in rsbtest (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether sleep supports fractional seconds... yes checking filesystem timestamp resolution... 0.01 @@ -1957,7 +1993,7 @@ checking for filesystem... yes checking for rsb_lib_init... no checking for dd... yes -/bin/bash +/bin/sh /usr/bin/timeout checking that generated files are newer than configure... done configure: creating ./config.status @@ -1989,7 +2025,7 @@ ARFLAGS : cru M4 : m4 MAKE : gmake - BASH : /bin/bash + BASH : /bin/sh OCTAVE : false DOXYGEN : doxygen HELP2MAN : help2man @@ -2008,7 +2044,7 @@ Supported I/O functionality level : "7" vs "7" Interface Error Verbosity : "0" vs "0" Internals Error Verbosity : "0" vs "0" - Memory hierarchy info, detected : "L2:16/64/512K,L1:2/64/64K" + Memory hierarchy info, detected : "L2:16/64/4096K,L1:8/64/32K" Memory hierarchy info, selected : "" Maximum of supported threads : "128" Build Fortran examples : "yes" vs "yes" @@ -2038,28 +2074,28 @@ blhc: ignore-line-regexp: ^.*CC.*: gcc blhc: ignore-line-regexp: ^.*LD.*: /usr/bin/ld.* dh_auto_build - make -j20 + make -j42 make[2]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake all-recursive gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' Making all in librsbpp gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbpp.o rsbpp.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c -o rsbtt.o rsbtt.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -c -o rsbct.o rsbct.c -libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -fPIC -DPIC -o .libs/rsbpp_csr.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -fPIC -DPIC -o .libs/rsbpp_coo.o +libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -fPIC -DPIC -o .libs/rsbpp_csr.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -o rsbpp_coo.o >/dev/null 2>&1 libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -o rsbpp_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs libtool: link: ar cr .libs/librsbpp.a .libs/rsbpp_coo.o .libs/rsbpp_csr.o libtool: link: ranlib .libs/librsbpp.a libtool: link: ( cd ".libs" && rm -f "librsbpp.la" && ln -s "../librsbpp.la" "librsbpp.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbct rsbct.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbtt rsbtt.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbpp rsbpp.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp @@ -2080,227 +2116,244 @@ gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o rsbench-rsb_libspblas_tests.o `test -f 'rsb_libspblas_tests.c' || echo './'`rsb_libspblas_tests.c g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -c -o rsb_dummy.o rsb_dummy.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o ch2icfb-ch2icfb.o `test -f 'ch2icfb.c' || echo './'`ch2icfb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_stropts.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_stropts.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mergesort.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_mergesort.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_unroll.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_unroll.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bench.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_bench.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_l.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mergesort.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_mergesort.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spmv.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spmv.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_ompio.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_ompio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_merge.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_merge.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_ompio.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_ompio.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_util.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_util.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_ompio.c -o librsb_nounroll_la-rsb_ompio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_is.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_is.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mio.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_op.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_op.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_op.c -o librsb_base_la-rsb_op.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_bio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_get.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_get.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_op.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_op.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_set.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_set.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_is.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_is.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_get.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_get.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_check.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_check.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_op.c -o librsb_base_la-rsb_op.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_check.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_check.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_symm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_symm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_idx.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_idx.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srt.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_ompio.c -o librsb_nounroll_la-rsb_ompio.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srtp.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srtp.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_src.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_src.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_clone.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_clone.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_test_accuracy.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_test_accuracy.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_render.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_render.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_eps.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_eps.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_msort_up.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_msort_up.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_test_accuracy.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_test_accuracy.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_sys.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_sys.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_test_accuracy.c -o librsb_base_la-rsb_test_accuracy.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_blas_stuff.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_blas_stuff.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_gen.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_gen.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_perf.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_perf.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_blas_stuff.c -o librsb_base_la-rsb_blas_stuff.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_err.c -o librsb_base_la-rsb_err.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_internals.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_internals.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_garbage.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_garbage.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_test_accuracy.c -o librsb_base_la-rsb_test_accuracy.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mmio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mmio.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_partition.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_partition.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mbw.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mbw.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_limiter.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_limiter.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_fpb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_fpb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsum.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsv.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_lock.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_lock.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_swt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_swt.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_init.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_init.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsv.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_dump.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_dump.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_init.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_init.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_cpmv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_cpmv.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_swt.c -o librsb_base_la-rsb_swt.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_cpmv.c -o librsb_base_la-rsb_cpmv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_asm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_asm.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo2rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo2rec.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2coo.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2csr.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_cpmv.c -o librsb_base_la-rsb_cpmv.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr2coo.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c +gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb_blas_sparse.o rsb_blas_sparse.F90 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb_blas_sparse.F90 -fPIC -o .libs/rsb_blas_sparse.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_swt.c -o librsb_base_la-rsb_swt.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas_handle.c -fPIC -DPIC -o .libs/rsb_libspblas_handle.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas.c -fPIC -DPIC -o .libs/rsb_libspblas.o -gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb_blas_sparse.o rsb_blas_sparse.F90 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 libtool: warning: '-version-info' is ignored for programs -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 libtool: link: gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -Wl,-z -Wl,relro -o ch2icfb ch2icfb-ch2icfb.o -fopenmp -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 +if test -f ./rsb_types.h -a ! -f ./rsb_types.h ; then cp -pv ./rsb_types.h ./rsb_types.h ; fi # out-of-dir behaviour varies between installations +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 +if test -f ch2icfb ; then if SED=/usr/bin/sed GREP=/usr/bin/grep /bin/sh ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb.lo rsb.F90 +gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb.o rsb.F90 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 +libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb.F90 -fPIC -o .libs/rsb.o +libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb.F90 -o rsb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas.c -o rsb_libspblas.o >/dev/null 2>&1 +libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb_blas_sparse.F90 -o rsb_blas_sparse.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +libtool: warning: '-version-info/-version-number' is ignored for convenience libraries +libtool: link: ar cr .libs/librsb_spblas.a .libs/rsb_libspblas_handle.o .libs/rsb_libspblas.o +libtool: link: ranlib .libs/librsb_spblas.a +libtool: link: ( cd ".libs" && rm -f "librsb_spblas.la" && ln -s "../librsb_spblas.la" "librsb_spblas.la" ) +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +libtool: warning: '-version-info/-version-number' is ignored for convenience libraries +libtool: link: ar cr .libs/librsb_base.a .libs/librsb_base_la-rsb_is.o .libs/librsb_base_la-rsb_mio.o .libs/librsb_base_la-rsb_op.o .libs/librsb_base_la-rsb_bio.o .libs/librsb_base_la-rsb_get.o .libs/librsb_base_la-rsb_set.o .libs/librsb_base_la-rsb_coo.o .libs/librsb_base_la-rsb_csr.o .libs/librsb_base_la-rsb_coo_check.o .libs/librsb_base_la-rsb_coo_symm.o .libs/librsb_base_la-rsb_idx.o .libs/librsb_base_la-rsb_srt.o .libs/librsb_base_la-rsb_srtp.o .libs/librsb_base_la-rsb_src.o .libs/librsb_base_la-rsb_test_accuracy.o .libs/librsb_base_la-rsb_clone.o .libs/librsb_base_la-rsb_rec.o .libs/librsb_base_la-rsb_render.o .libs/librsb_base_la-rsb_eps.o .libs/librsb_base_la-rsb_msort_up.o .libs/librsb_base_la-rsb_sys.o .libs/librsb_base_la-rsb_blas_stuff.o .libs/librsb_base_la-rsb_gen.o .libs/librsb_base_la-rsb_perf.o .libs/librsb_base_la-rsb_rsb.o .libs/librsb_base_la-rsb_err.o .libs/librsb_base_la-rsb_tune.o .libs/librsb_base_la-rsb_do.o .libs/librsb_base_la-rsb_internals.o .libs/librsb_base_la-rsb_garbage.o .libs/librsb_base_la-rsb_mmio.o .libs/librsb_base_la-rsb_partition.o .libs/librsb_base_la-rsb_mbw.o .libs/librsb_base_la-rsb_limiter.o .libs/librsb_base_la-rsb_fpb.o .libs/librsb_base_la-rsb_spgemm.o .libs/librsb_base_la-rsb_spsum.o .libs/librsb_base_la-rsb_spsv.o .libs/librsb_base_la-rsb_lock.o .libs/librsb_base_la-rsb_swt.o .libs/librsb_base_la-rsb_init.o .libs/librsb_base_la-rsb_dump.o .libs/librsb_base_la-rsb_cpmv.o .libs/librsb_base_la-rsb_asm.o .libs/librsb_base_la-rsb_user.o .libs/librsb_base_la-rsb_coo2rec.o .libs/librsb_base_la-rsb_rec2coo.o .libs/librsb_base_la-rsb_rec2csr.o .libs/librsb_base_la-rsb_csr2coo.o .libs/rsb_blas_sparse.o +libtool: link: ranlib .libs/librsb_base.a rsb_test_matops.c: In function 'rsb__main_block_partitioned_spmv_sxsa': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", @@ -2319,26 +2372,10 @@ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~ -if test -f ./rsb_types.h -a ! -f ./rsb_types.h ; then cp -pv ./rsb_types.h ./rsb_types.h ; fi # out-of-dir behaviour varies between installations -if test -f ch2icfb ; then if SED=/usr/bin/sed GREP=/usr/bin/grep /bin/bash ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb.lo rsb.F90 -gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o rsb.o rsb.F90 -libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb.F90 -fPIC -o .libs/rsb.o -libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb.F90 -o rsb.o >/dev/null 2>&1 -libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c rsb_blas_sparse.F90 -o rsb_blas_sparse.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_libspblas.c -o rsb_libspblas.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -libtool: warning: '-version-info/-version-number' is ignored for convenience libraries -libtool: link: ar cr .libs/librsb_base.a .libs/librsb_base_la-rsb_is.o .libs/librsb_base_la-rsb_mio.o .libs/librsb_base_la-rsb_op.o .libs/librsb_base_la-rsb_bio.o .libs/librsb_base_la-rsb_get.o .libs/librsb_base_la-rsb_set.o .libs/librsb_base_la-rsb_coo.o .libs/librsb_base_la-rsb_csr.o .libs/librsb_base_la-rsb_coo_check.o .libs/librsb_base_la-rsb_coo_symm.o .libs/librsb_base_la-rsb_idx.o .libs/librsb_base_la-rsb_srt.o .libs/librsb_base_la-rsb_srtp.o .libs/librsb_base_la-rsb_src.o .libs/librsb_base_la-rsb_test_accuracy.o .libs/librsb_base_la-rsb_clone.o .libs/librsb_base_la-rsb_rec.o .libs/librsb_base_la-rsb_render.o .libs/librsb_base_la-rsb_eps.o .libs/librsb_base_la-rsb_msort_up.o .libs/librsb_base_la-rsb_sys.o .libs/librsb_base_la-rsb_blas_stuff.o .libs/librsb_base_la-rsb_gen.o .libs/librsb_base_la-rsb_perf.o .libs/librsb_base_la-rsb_rsb.o .libs/librsb_base_la-rsb_err.o .libs/librsb_base_la-rsb_tune.o .libs/librsb_base_la-rsb_do.o .libs/librsb_base_la-rsb_internals.o .libs/librsb_base_la-rsb_garbage.o .libs/librsb_base_la-rsb_mmio.o .libs/librsb_base_la-rsb_partition.o .libs/librsb_base_la-rsb_mbw.o .libs/librsb_base_la-rsb_limiter.o .libs/librsb_base_la-rsb_fpb.o .libs/librsb_base_la-rsb_spgemm.o .libs/librsb_base_la-rsb_spsum.o .libs/librsb_base_la-rsb_spsv.o .libs/librsb_base_la-rsb_lock.o .libs/librsb_base_la-rsb_swt.o .libs/librsb_base_la-rsb_init.o .libs/librsb_base_la-rsb_dump.o .libs/librsb_base_la-rsb_cpmv.o .libs/librsb_base_la-rsb_asm.o .libs/librsb_base_la-rsb_user.o .libs/librsb_base_la-rsb_coo2rec.o .libs/librsb_base_la-rsb_rec2coo.o .libs/librsb_base_la-rsb_rec2csr.o .libs/librsb_base_la-rsb_csr2coo.o .libs/rsb_blas_sparse.o -libtool: link: ranlib .libs/librsb_base.a libtool: link: ( cd ".libs" && rm -f "librsb_base.la" && ln -s "../librsb_base.la" "librsb_base.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -libtool: warning: '-version-info/-version-number' is ignored for convenience libraries -libtool: link: ar cr .libs/librsb_spblas.a .libs/rsb_libspblas_handle.o .libs/rsb_libspblas.o -libtool: link: ranlib .libs/librsb_spblas.a -libtool: link: ( cd ".libs" && rm -f "librsb_spblas.la" && ln -s "../librsb_spblas.la" "librsb_spblas.la" ) +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 rsb_test_matops.c: In function 'rsb__main_block_partitioned_spsv_sxsx': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", @@ -2353,7 +2390,6 @@ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~ -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 rsb_test_matops.c: In function 'rsb__main_block_partitioned_mat_stats': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] @@ -2371,12 +2407,12 @@ | ~~~~~~~~~~~~~~~~~ libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_nounroll.a .libs/librsb_nounroll_la-rsb_stropts.o .libs/librsb_nounroll_la-rsb_strmif.o .libs/librsb_nounroll_la-rsb_unroll.o .libs/librsb_nounroll_la-rsb_krnl_vb.o .libs/librsb_nounroll_la-rsb_krnl_lb.o .libs/librsb_nounroll_la-rsb_krnl.o .libs/librsb_nounroll_la-rsb_bench.o .libs/librsb_nounroll_la-rsb_mergesort.o .libs/librsb_nounroll_la-rsb_permute.o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss.o .libs/librsb_nounroll_la-rsb_spmv.o .libs/librsb_nounroll_la-rsb_merge.o .libs/librsb_nounroll_la-rsb_ompio.o .libs/librsb_nounroll_la-rsb_util.o .libs/librsb_nounroll_la-rsb_spgemm_csr.o .libs/librsb_nounroll_la-rsb_spsum_misc.o .libs/librsb_nounroll_la-rsb_prec.o libtool: link: ranlib .libs/librsb_nounroll.a libtool: link: ( cd ".libs" && rm -f "librsb_nounroll.la" && ln -s "../librsb_nounroll.la" "librsb_nounroll.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/x86_64-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/x86_64-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: gcc -shared -fPIC -DPIC .libs/rsb.o -Wl,--whole-archive ./.libs/librsb_nounroll.a ./.libs/librsb_base.a ./.libs/librsb_spblas.a /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/.libs/librsbpp.a -Wl,--no-whole-archive -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lpthread -lstdc++fs -lstdc++ -lhwloc -lz -lgfortran -lm -lquadmath -g -O2 -Werror=implicit-function-declaration -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Werror=format-security -O3 -Wl,-z -Wl,relro -fopenmp -fopenmp -Wl,-soname -Wl,librsb.so.0 -o .libs/librsb.so.0.0.0 libtool: link: (cd ".libs" && rm -f "librsb.so.0" && ln -s "librsb.so.0.0.0" "librsb.so.0") libtool: link: (cd ".libs" && rm -f "librsb.so" && ln -s "librsb.so.0.0.0" "librsb.so") @@ -2388,7 +2424,7 @@ libtool: link: ranlib .libs/librsb.a libtool: link: rm -fr .libs/librsb.lax libtool: link: ( cd ".libs" && rm -f "librsb.la" && ln -s "../librsb.la" "librsb.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o -fopenmp ./.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -2405,27 +2441,27 @@ gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o fortran.o fortran.F90 gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -fcf-protection -pipe -fopenmp -c -o fortran_rsb_fi.o fortran_rsb_fi.F90 g++ -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -c -o cplusplus.o cplusplus.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/power power.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/io-spblas io-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/backsolve backsolve.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/hello hello.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/fortran_rsb_fi fortran_rsb_fi.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/autotune autotune.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/fortran fortran.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/snippets snippets.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp -Wl,-z -Wl,relro -o .libs/cplusplus cplusplus.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/14 -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/14/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/14/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' Making all in scripts @@ -2445,9 +2481,9 @@ /usr/bin/mkdir -p man /usr/bin/mkdir -p man gmake makedox -SOURCE_DATE_EPOCH=1735736461 \ +SOURCE_DATE_EPOCH=1767178861 \ help2man --name="benchmark and test for librsb" --no-info ../rsbench | /usr/bin/sed 's/January //g' > man/rsbench.1 -SOURCE_DATE_EPOCH=1735736461 \ +SOURCE_DATE_EPOCH=1767178861 \ help2man --name="provide configuration information for librsb" --no-info /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb-config | /usr/bin/sed 's/January //g' > man/librsb-config.1 gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/doc' DOXYGEN_PROJECT_NUMBER=1.3.0.2 doxygen Doxyfile || echo "are you sure you have doxygen installed ?" @@ -3159,13 +3195,13 @@ g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o rgt-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o rgt_ne-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o rgt_rv-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -libtool: link: g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' Making all in examples @@ -3180,25 +3216,25 @@ g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o mtx2bin.o mtx2bin.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o render.o render.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o span.o span.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib/examples' gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' @@ -3208,7 +3244,7 @@ gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' g++ -DHAVE_CONFIG_H -I. -I/build/reproducible-path/librsb-1.3.0.2+dfsg -Wdate-time -D_FORTIFY_SOURCE=2 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -c -o rsbtest.o rsbtest.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wl,-z -Wl,relro -o .libs/rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lstdc++fs -fopenmp gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' @@ -3284,7 +3320,7 @@ gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake mtests -C . gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash -ex ./scripts/readme-tests.sh +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh -ex ./scripts/readme-tests.sh + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + ./rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # --bench option implies -qH -R --write-performance-record --want-mkl-autotune --mkl-benchmark --types : --split-experimental 6 --merge-experimental 6 --also-transpose --sort-filenames-list --want-memory-benchmark @@ -3294,15 +3330,15 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740166905 +# beginning run at 1774579335 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -# average timer granularity: 6.45e-08 s -# Will write a final performance record to file rsbench_pr__1740166905_gcc-14.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1740166905_gcc-14.2-1,4th.rpr.tmp +# average timer granularity: 3.36e-08 s +# Will write a final performance record to file rsbench_pr__1774579335_gcc-14.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1774579335_gcc-14.2-1,4th.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -3341,61 +3377,61 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos1-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.064s +# Memory benchmark took 6.290s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 8 samples (2240 bytes). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 5.068s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 6.292s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type Z... -# file input of A.mtx took 0.00 s (6 nnz, 32058 nnz/s ) (0.98 MB/s ) -#pre-sorting (6 elements) took 0.0334489 s -#weeding duplicates (to 6 elements) took 5.96046e-06 s (and check, 3.09944e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 71494 nnz/s ) (2.19 MB/s ) +#pre-sorting (6 elements) took 0.00387716 s +#weeding duplicates (to 6 elements) took 1.90735e-06 s (and check, 1.90735e-06 s ) # multi-nrhs benchmarking (1,2) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.096s): (3 x 3)[0x55860a1d4d50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.020s): (3 x 3)[0x564373185e60]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.07199s; avg 0.024s ( +/- 40.94/ 40.92 %); best 0.01417s; worst 0.03382s; std dev. 0.00802 (taking best). -Reference operation time is 0.0141718 s (0.006774 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01417 Mflops: 0.007) -Merge (3 -> 1 leaves) took w.c.t. of 5.984e-05s, ~2.098e-05s of computing time (of which 5.007e-06s sorting, 9.06e-06s analysis) -3 iterations (1 th.) took 6.008e-05s; avg 2.003e-05s ( +/- 99.69/195.24 %); best 6.235e-08s; worst 5.913e-05s; std dev. 2.765e-05 (taking best). -Reference operation time is 6.23465e-08 s (1540 Mflops) with 1 threads. -After merge step 1: tpop: 6.235e-08 s ~Mflops: 1539.783 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 227307.839x: 0.01417s -> 6.235e-08s, so taking this instance. +3 iterations (1 th.) took 0.02358s; avg 0.007861s ( +/- 52.60/104.57 %); best 0.003726s; worst 0.01608s; std dev. 0.005813 (taking best). +Reference operation time is 0.00372601 s (0.02576 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003726 Mflops: 0.026) +Merge (3 -> 1 leaves) took w.c.t. of 2.694e-05s, ~9.06e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (1 th.) took 2.599e-05s; avg 8.663e-06s ( +/- 99.61/188.99 %); best 3.351e-08s; worst 2.503e-05s; std dev. 1.158e-05 (taking best). +Reference operation time is 3.35097e-08 s (2865 Mflops) with 1 threads. +After merge step 1: tpop: 3.351e-08 s ~Mflops: 2864.839 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 111191.747x: 0.003726s -> 3.351e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06798s (of which 6.914e-05s partitioning, 0s I/O); computing times: 2.098e-05s in par. loops, 5.007e-06s sorting, 9.06e-06s analyzing) -Total merge + benchmarking process took 0.06798s, equivalent to 1090344.2/4.8 new/old ops (0.1396s for 2 clones -- as 2239441.7/9.9 ops, or 1119720.8/4.9 ops per clone), SPEEDUP of 227307.839x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 227307.839x (0.01417s -> 6.235e-08s), will amortize in 4.8 ops by saving 0.01417s per op. -In 1 tuning rounds (tot. 0.21s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 22730683.9% (2.273e+05x) (from 0.006774 to 1540 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.009928s (of which 3.099e-05s partitioning, 0s I/O); computing times: 9.06e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.009928s, equivalent to 296271.8/2.7 new/old ops (0.02102s for 2 clones -- as 627250.1/5.6 ops, or 313625.0/2.8 ops per clone), SPEEDUP of 111191.747x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 111191.747x (0.003726s -> 3.351e-08s), will amortize in 2.7 ops by saving 0.003726s per op. +In 1 tuning rounds (tot. 0.045s, 0.021s for constructor, 2 clones) obtained a SPEEDUP of 11119074.7% (1.112e+05x) (from 0.02576 to 2865 Mflops). #pr: updating sample at index 1 (0^th of 8), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.211945 s (1.417e-02 s -> 6.235e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0447469 s (3.726e-03 s -> 3.351e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.09598 s and estimated a speedup of 1.000000 x (6.235e-08 s -> 6.235e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.188447 s and estimated a speedup of 1.000000 x (3.351e-08 s -> 3.351e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.035733 0.024121 0.059854 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.059854 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.035733 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.024121 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.005498 0.007374 0.012872 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.012872 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.005498 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.007374 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.059854 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.012872 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3410,47 +3446,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.140s): (3 x 3)[0x55860a1d4d50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.025s): (3 x 3)[0x564373185e60]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 0.07596s; avg 0.02532s ( +/- 8.93/ 12.50 %); best 0.02306s; worst 0.02849s; std dev. 0.002306 (taking best). -Reference operation time is 0.0230579 s (0.004163 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.02306 Mflops: 0.004) -Merge (3 -> 1 leaves) took w.c.t. of 3.099e-05s, ~1.097e-05s of computing time (of which 9.537e-07s sorting, 7.153e-06s analysis) -3 iterations (4 th.) took 6.199e-06s; avg 2.066e-06s ( +/- 96.98/142.31 %); best 6.235e-08s; worst 5.007e-06s; std dev. 2.135e-06 (taking best). -Reference operation time is 6.23465e-08 s (1540 Mflops) with 4 threads. -After merge step 1: tpop: 6.235e-08 s ~Mflops: 1539.783 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 369835.564x: 0.02306s -> 6.235e-08s, so taking this instance. +3 iterations (4 th.) took 0.01132s; avg 0.003772s ( +/- 0.65/ 0.59 %); best 0.003747s; worst 0.003794s; std dev. 1.914e-05 (taking best). +Reference operation time is 0.00374722 s (0.02562 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003747 Mflops: 0.026) +Merge (3 -> 1 leaves) took w.c.t. of 1.407e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (4 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 97.52/129.41 %); best 3.351e-08s; worst 3.099e-06s; std dev. 1.296e-06 (taking best). +Reference operation time is 3.35097e-08 s (2865 Mflops) with 4 threads. +After merge step 1: tpop: 3.351e-08 s ~Mflops: 2864.839 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 111824.973x: 0.003747s -> 3.351e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.07199s (of which 3.505e-05s partitioning, 0s I/O); computing times: 1.097e-05s in par. loops, 9.537e-07s sorting, 7.153e-06s analyzing) -Total merge + benchmarking process took 0.07199s, equivalent to 1154722.8/3.1 new/old ops (0.1275s for 2 clones -- as 2044914.0/5.5 ops, or 1022457.0/2.8 ops per clone), SPEEDUP of 369835.564x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 369835.564x (0.02306s -> 6.235e-08s), will amortize in 3.1 ops by saving 0.02306s per op. -In 1 tuning rounds (tot. 0.2s, 0.13s for constructor, 2 clones) obtained a SPEEDUP of 36983456.4% (3.698e+05x) (from 0.004163 to 1540 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01128s (of which 1.788e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01128s, equivalent to 336705.8/3.0 new/old ops (0.02258s for 2 clones -- as 673717.5/6.0 ops, or 336858.8/3.0 ops per clone), SPEEDUP of 111824.973x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 111824.973x (0.003747s -> 3.351e-08s), will amortize in 3.0 ops by saving 0.003747s per op. +In 1 tuning rounds (tot. 0.034s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11182397.3% (1.118e+05x) (from 0.02562 to 2865 Mflops). #pr: updating sample at index 5 (1^th of 8), 0^th touch for (0,1,0,0,0,0,0). -First run of RSB Autotuner took 0.203651 s (2.306e-02 s -> 6.235e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.033983 s (3.747e-03 s -> 3.351e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.880015 s and estimated a speedup of 1.000000 x (6.235e-08 s -> 6.235e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.188265 s and estimated a speedup of 1.000000 x (3.351e-08 s -> 3.351e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.032852 0.062377 0.095229 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.095229 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.032852 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.062377 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.006579 0.012591 0.019170 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.019170 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.006579 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.012591 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.095229 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.019170 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.63 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.63 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 1.09 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.39 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 1.09 0.39 0.63 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.67 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.67 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.84 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.59 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.84 0.59 0.67 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3459,50 +3495,50 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.0957091 1e+09 1e+09 +%operation:A.mtx 0.0204411 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.035733 0 0.0241208 +%constructor:A.mtx 0 0.00549793 0 0.00737405 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,2) -- now using nrhs 2. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.091s): (3 x 3)[0x55860a1d4d50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.021s): (3 x 3)[0x564373185e60]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.048s; avg 0.016s ( +/- 9.08/ 9.11 %); best 0.01455s; worst 0.01746s; std dev. 0.001188 (taking best). -Reference operation time is 0.0145481 s (0.0132 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01455 Mflops: 0.013) -Merge (3 -> 1 leaves) took w.c.t. of 3.695e-05s, ~1.192e-05s of computing time (of which 2.146e-06s sorting, 7.153e-06s analysis) -3 iterations (1 th.) took 6.914e-06s; avg 2.305e-06s ( +/- 97.29/158.62 %); best 6.235e-08s; worst 5.96e-06s; std dev. 2.614e-06 (taking best). -Reference operation time is 6.23465e-08 s (3080 Mflops) with 1 threads. -After merge step 1: tpop: 6.235e-08 s ~Mflops: 3079.565 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 233342.256x: 0.01455s -> 6.235e-08s, so taking this instance. +3 iterations (1 th.) took 0.01137s; avg 0.00379s ( +/- 1.92/ 2.94 %); best 0.003717s; worst 0.003901s; std dev. 7.995e-05 (taking best). +Reference operation time is 0.00371695 s (0.05166 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003717 Mflops: 0.052) +Merge (3 -> 1 leaves) took w.c.t. of 1.097e-05s, ~5.007e-06s of computing time (of which 2.146e-06s sorting, 2.146e-06s analysis) +3 iterations (1 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 98.31/152.00 %); best 3.351e-08s; worst 5.007e-06s; std dev. 2.171e-06 (taking best). +Reference operation time is 3.35097e-08 s (5730 Mflops) with 1 threads. +After merge step 1: tpop: 3.351e-08 s ~Mflops: 5729.679 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 110921.380x: 0.003717s -> 3.351e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04796s (of which 4.101e-05s partitioning, 0s I/O); computing times: 1.192e-05s in par. loops, 2.146e-06s sorting, 7.153e-06s analyzing) -Total merge + benchmarking process took 0.04796s, equivalent to 769311.7/3.3 new/old ops (0.1038s for 2 clones -- as 1664313.6/7.1 ops, or 832156.8/3.6 ops per clone), SPEEDUP of 233342.256x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 233342.256x (0.01455s -> 6.235e-08s), will amortize in 3.3 ops by saving 0.01455s per op. -In 1 tuning rounds (tot. 0.15s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 23334125.6% (2.333e+05x) (from 0.0132 to 3080 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01169s (of which 1.311e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 2.146e-06s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.01169s, equivalent to 348794.0/3.1 new/old ops (0.02294s for 2 clones -- as 684638.9/6.2 ops, or 342319.5/3.1 ops per clone), SPEEDUP of 110921.380x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 110921.380x (0.003717s -> 3.351e-08s), will amortize in 3.1 ops by saving 0.003717s per op. +In 1 tuning rounds (tot. 0.034s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11092038.0% (1.109e+05x) (from 0.05166 to 5730 Mflops). #pr: updating sample at index 3 (2^th of 8), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.151961 s (1.455e-02 s -> 6.235e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0343881 s (3.717e-03 s -> 3.351e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.871956 s and estimated a speedup of 1.000000 x (6.235e-08 s -> 6.235e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.188467 s and estimated a speedup of 1.000000 x (3.351e-08 s -> 3.351e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.022549 0.035976 0.058525 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.058525 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.022549 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.035976 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.005475 0.007753 0.013228 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.013228 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.005475 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.007753 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.058525 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.013228 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3517,47 +3553,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.144s): (3 x 3)[0x55860a1d4d50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.024s): (3 x 3)[0x564373185e60]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 0.04801s; avg 0.016s ( +/- 0.15/ 0.25 %); best 0.01598s; worst 0.01604s; std dev. 2.845e-05 (taking best). -Reference operation time is 0.0159779 s (0.01202 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01598 Mflops: 0.012) -Merge (3 -> 1 leaves) took w.c.t. of 2.503e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 5.007e-06s analysis) -3 iterations (4 th.) took 7.868e-06s; avg 2.623e-06s ( +/- 97.62/163.64 %); best 6.235e-08s; worst 6.914e-06s; std dev. 3.059e-06 (taking best). -Reference operation time is 6.23465e-08 s (3080 Mflops) with 4 threads. -After merge step 1: tpop: 6.235e-08 s ~Mflops: 3079.565 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 256275.335x: 0.01598s -> 6.235e-08s, so taking this instance. +3 iterations (4 th.) took 0.01131s; avg 0.003771s ( +/- 1.59/ 2.52 %); best 0.003711s; worst 0.003866s; std dev. 6.793e-05 (taking best). +Reference operation time is 0.00371099 s (0.05174 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003711 Mflops: 0.052) +Merge (3 -> 1 leaves) took w.c.t. of 1.192e-05s, ~5.007e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (4 th.) took 3.815e-06s; avg 1.272e-06s ( +/- 97.36/125.00 %); best 3.351e-08s; worst 2.861e-06s; std dev. 1.189e-06 (taking best). +Reference operation time is 3.35097e-08 s (5730 Mflops) with 4 threads. +After merge step 1: tpop: 3.351e-08 s ~Mflops: 5729.679 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 110743.508x: 0.003711s -> 3.351e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05201s (of which 2.789e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 9.537e-07s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.05201s, equivalent to 834256.2/3.3 new/old ops (0.09978s for 2 clones -- as 1600443.6/6.2 ops, or 800221.8/3.1 ops per clone), SPEEDUP of 256275.335x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 256275.335x (0.01598s -> 6.235e-08s), will amortize in 3.3 ops by saving 0.01598s per op. -In 1 tuning rounds (tot. 0.15s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 25627433.5% (2.563e+05x) (from 0.01202 to 3080 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01049s (of which 1.478e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 9.537e-07s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 0.01049s, equivalent to 313098.5/2.8 new/old ops (0.02175s for 2 clones -- as 649007.5/5.9 ops, or 324503.7/2.9 ops per clone), SPEEDUP of 110743.508x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 110743.508x (0.003711s -> 3.351e-08s), will amortize in 2.8 ops by saving 0.003711s per op. +In 1 tuning rounds (tot. 0.033s, 0.022s for constructor, 2 clones) obtained a SPEEDUP of 11074250.8% (1.107e+05x) (from 0.05174 to 5730 Mflops). #pr: updating sample at index 7 (3^th of 8), 0^th touch for (0,1,0,0,1,0,0). -First run of RSB Autotuner took 0.147957 s (1.598e-02 s -> 6.235e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.033138 s (3.711e-03 s -> 3.351e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.844321 s and estimated a speedup of 1.000000 x (6.235e-08 s -> 6.235e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.18958 s and estimated a speedup of 1.000000 x (3.351e-08 s -> 3.351e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.035459 0.079949 0.115408 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.115408 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.035459 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.079949 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.007393 0.009250 0.016643 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.016643 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.007393 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.009250 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.115408 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.016643 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.51 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.51 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.64 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.45 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.64 0.45 0.51 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.79 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.79 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.74 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.84 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.74 0.84 0.79 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3566,184 +3602,184 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.090641 1e+09 1e+09 +%operation:A.mtx 0.0208929 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.0225489 0 0.0359762 +%constructor:A.mtx 0 0.00547504 0 0.0077529 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 10.346s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.408s/0.000s . +# so far, program took 7.378s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.901s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.1247s (system CPU time used) -ru_utime : 58.94s (user CPU time used) +ru_maxrss: 23 (maximum resident set size -- MB) +ru_stime : 0.2s (system CPU time used) +ru_utime : 48.09s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1539.78 1.417e-02 0.000e+00 6.235e-08 0.000e+00 2.119e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1539.78 2.306e-02 0.000e+00 6.235e-08 0.000e+00 2.037e-01 4.30e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 2864.84 3.726e-03 0.000e+00 3.351e-08 0.000e+00 4.475e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 2864.84 3.747e-03 0.000e+00 3.351e-08 0.000e+00 3.398e-02 8.00e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 29857070.2 % faster, avg. sp. ratio 298571.702x, max sp. ratio 369835.564x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3332956.0/3266439.8/3399472.3/6665912.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.9/ 8.8/ 15.0/ 23.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.9, min. 8.8, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11150736.0 % faster, avg. sp. ratio 111508.360x, max sp. ratio 111824.973x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1174731.4/1014123.1/1335339.7/2349462.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.5/ 9.1/ 12.0/ 21.1 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.5, min. 9.1, max. 12.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.529/ 3.529/ 3.529,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.597/ 4.299/ 4.299,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.565/ 6.565/ 6.565,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.995/ 7.998/ 7.998,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.21 s, min 0.20 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.21 s, min 0.20 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.540e+03, min 1.540e+03, max 1.540e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 5.469e-03, min 4.163e-03, max 6.774e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 1.247e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.861e-02s, min 1.417e-02s, max 2.306e-02s, tot 3.723e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.814e+00 1.814e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.865e+03, min 2.865e+03, max 2.865e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 2.569e-02, min 2.562e-02, max 2.576e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 6.702e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.737e-03s, min 3.726e-03s, max 3.747e-03s, tot 7.473e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.402e+00 2.402e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: ======== Limiting to nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3079.57 1.455e-02 0.000e+00 6.235e-08 0.000e+00 1.520e-01 6.61e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3079.57 1.598e-02 0.000e+00 6.235e-08 0.000e+00 1.480e-01 6.61e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 5729.68 3.717e-03 0.000e+00 3.351e-08 0.000e+00 3.439e-02 1.23e+01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 5729.68 3.711e-03 0.000e+00 3.351e-08 0.000e+00 3.314e-02 1.23e+01 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 24480779.5 % faster, avg. sp. ratio 244808.795x, max sp. ratio 256275.335x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2405252.4/2373143.4/2437361.4/4810504.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.9/ 9.3/ 10.4/ 19.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.9, min. 9.3, max. 10.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11083144.4 % faster, avg. sp. ratio 110832.444x, max sp. ratio 110921.380x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1007559.6/988907.9/1026211.3/2015119.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.9/ 9.3/ 18.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.9, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.068/ 5.068/ 5.068,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.216/ 6.608/ 6.608,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 9.430/ 9.430/ 9.430,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 24.590/ 12.295/ 12.295,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.15 s, max 0.15 s, tot 0.30 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.15 s, max 0.15 s, tot 0.30 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.080e+03, min 3.080e+03, max 3.080e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.261e-02, min 1.202e-02, max 1.320e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 1.247e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.526e-02s, min 1.455e-02s, max 1.598e-02s, tot 3.053e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.263e+00 1.263e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 5.730e+03, min 5.730e+03, max 5.730e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.170e-02, min 5.166e-02, max 5.174e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 6.702e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.714e-03s, min 3.711e-03s, max 3.717e-03s, tot 7.428e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.673e+00 1.673e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1539.78 1.417e-02 0.000e+00 6.235e-08 0.000e+00 2.119e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3079.57 1.455e-02 0.000e+00 6.235e-08 0.000e+00 1.520e-01 6.61e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1539.78 2.306e-02 0.000e+00 6.235e-08 0.000e+00 2.037e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3079.57 1.598e-02 0.000e+00 6.235e-08 0.000e+00 1.480e-01 6.61e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 2864.84 3.726e-03 0.000e+00 3.351e-08 0.000e+00 4.475e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 5729.68 3.717e-03 0.000e+00 3.351e-08 0.000e+00 3.439e-02 1.23e+01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 2864.84 3.747e-03 0.000e+00 3.351e-08 0.000e+00 3.398e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 5729.68 3.711e-03 0.000e+00 3.351e-08 0.000e+00 3.314e-02 1.23e+01 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 27168924.9 % faster, avg. sp. ratio 271690.249x, max sp. ratio 369835.564x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2869104.2/2373143.4/3399472.3/11476416.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.9/ 8.8/ 15.0/ 43.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.9, min. 8.8, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11116940.2 % faster, avg. sp. ratio 111170.402x, max sp. ratio 111824.973x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1091145.5/988907.9/1335339.7/4364582.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.8/ 8.9/ 12.0/ 39.3 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.8, min. 8.9, max. 12.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.299/ 3.529/ 5.068,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 21.814/ 4.299/ 6.608,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 7.998/ 6.565/ 9.430,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 40.585/ 7.998/ 12.295,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.15 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.15 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.310e+03, min 1.540e+03, max 3.080e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.038e-03, min 4.163e-03, max 1.320e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 2.494e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.694e-02s, min 1.417e-02s, max 2.306e-02s, tot 6.776e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.263e+00 1.814e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.15 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.15 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.297e+03, min 2.865e+03, max 5.730e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.869e-02, min 2.562e-02, max 5.174e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.725e-03s, min 3.711e-03s, max 3.747e-03s, tot 1.490e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.673e+00 2.402e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1539.78 1.417e-02 0.000e+00 6.235e-08 0.000e+00 2.119e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1539.78 2.306e-02 0.000e+00 6.235e-08 0.000e+00 2.037e-01 4.30e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 2864.84 3.726e-03 0.000e+00 3.351e-08 0.000e+00 4.475e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 2864.84 3.747e-03 0.000e+00 3.351e-08 0.000e+00 3.398e-02 8.00e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 29857070.2 % faster, avg. sp. ratio 298571.702x, max sp. ratio 369835.564x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3332956.0/3266439.8/3399472.3/6665912.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.9/ 8.8/ 15.0/ 23.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.9, min. 8.8, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11150736.0 % faster, avg. sp. ratio 111508.360x, max sp. ratio 111824.973x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1174731.4/1014123.1/1335339.7/2349462.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.5/ 9.1/ 12.0/ 21.1 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.5, min. 9.1, max. 12.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.529/ 3.529/ 3.529,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.597/ 4.299/ 4.299,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.565/ 6.565/ 6.565,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.995/ 7.998/ 7.998,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.21 s, min 0.20 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.21 s, min 0.20 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.540e+03, min 1.540e+03, max 1.540e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 5.469e-03, min 4.163e-03, max 6.774e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 1.247e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.861e-02s, min 1.417e-02s, max 2.306e-02s, tot 3.723e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.814e+00 1.814e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.08 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.865e+03, min 2.865e+03, max 2.865e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 2.569e-02, min 2.562e-02, max 2.576e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 6.702e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.737e-03s, min 3.726e-03s, max 3.747e-03s, tot 7.473e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.402e+00 2.402e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: ======== Limiting to both transA=N and nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3079.57 1.455e-02 0.000e+00 6.235e-08 0.000e+00 1.520e-01 6.61e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3079.57 1.598e-02 0.000e+00 6.235e-08 0.000e+00 1.480e-01 6.61e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 5729.68 3.717e-03 0.000e+00 3.351e-08 0.000e+00 3.439e-02 1.23e+01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 5729.68 3.711e-03 0.000e+00 3.351e-08 0.000e+00 3.314e-02 1.23e+01 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 24480779.5 % faster, avg. sp. ratio 244808.795x, max sp. ratio 256275.335x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2405252.4/2373143.4/2437361.4/4810504.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.9/ 9.3/ 10.4/ 19.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.9, min. 9.3, max. 10.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11083144.4 % faster, avg. sp. ratio 110832.444x, max sp. ratio 110921.380x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1007559.6/988907.9/1026211.3/2015119.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.9/ 9.3/ 18.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.9, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.068/ 5.068/ 5.068,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.216/ 6.608/ 6.608,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 9.430/ 9.430/ 9.430,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 24.590/ 12.295/ 12.295,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.15 s, max 0.15 s, tot 0.30 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.15 s, max 0.15 s, tot 0.30 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.080e+03, min 3.080e+03, max 3.080e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.261e-02, min 1.202e-02, max 1.320e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 1.247e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.526e-02s, min 1.455e-02s, max 1.598e-02s, tot 3.053e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.263e+00 1.263e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 5.730e+03, min 5.730e+03, max 5.730e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.170e-02, min 5.166e-02, max 5.174e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 6.702e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.714e-03s, min 3.711e-03s, max 3.717e-03s, tot 7.428e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.673e+00 1.673e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 4) matched the dump criteria -- skipping dump round. @@ -3754,44 +3790,44 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1539.78 1.417e-02 0.000e+00 6.235e-08 0.000e+00 2.119e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3079.57 1.455e-02 0.000e+00 6.235e-08 0.000e+00 1.520e-01 6.61e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1539.78 2.306e-02 0.000e+00 6.235e-08 0.000e+00 2.037e-01 4.30e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3079.57 1.598e-02 0.000e+00 6.235e-08 0.000e+00 1.480e-01 6.61e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 2864.84 3.726e-03 0.000e+00 3.351e-08 0.000e+00 4.475e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 5729.68 3.717e-03 0.000e+00 3.351e-08 0.000e+00 3.439e-02 1.23e+01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 2864.84 3.747e-03 0.000e+00 3.351e-08 0.000e+00 3.398e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 5729.68 3.711e-03 0.000e+00 3.351e-08 0.000e+00 3.314e-02 1.23e+01 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 27168924.9 % faster, avg. sp. ratio 271690.249x, max sp. ratio 369835.564x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2869104.2/2373143.4/3399472.3/11476416.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.9/ 8.8/ 15.0/ 43.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.9, min. 8.8, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11116940.2 % faster, avg. sp. ratio 111170.402x, max sp. ratio 111824.973x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1091145.5/988907.9/1335339.7/4364582.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.8/ 8.9/ 12.0/ 39.3 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.8, min. 8.9, max. 12.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.299/ 3.529/ 5.068,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 21.814/ 4.299/ 6.608,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 7.998/ 6.565/ 9.430,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 40.585/ 7.998/ 12.295,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.15 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.15 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.310e+03, min 1.540e+03, max 3.080e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.038e-03, min 4.163e-03, max 1.320e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.235e-08s, min 6.235e-08s, max 6.235e-08s, tot 2.494e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.694e-02s, min 1.417e-02s, max 2.306e-02s, tot 6.776e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.263e+00 1.814e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.891e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.15 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.15 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.297e+03, min 2.865e+03, max 5.730e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.869e-02, min 2.562e-02, max 5.174e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.351e-08s, min 3.351e-08s, max 3.351e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.725e-03s, min 3.711e-03s, max 3.747e-03s, tot 1.490e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.673e+00 2.402e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.614e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) -#pr: Record collection took 4.34 s. +#pr: Record collection took 0.87 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 100 environment variables in 5265 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 100 environment variables in 5318 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1740166905_gcc-14.2-1,4th.rpr -# Removing the temporary record file rsbench_pr__1740166905_gcc-14.2-1,4th.rpr.tmp. -# terminating run at 1740166916 (after 10.3s of w.c.t.) +#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1774579335_gcc-14.2-1,4th.rpr +# Removing the temporary record file rsbench_pr__1774579335_gcc-14.2-1,4th.rpr.tmp. +# terminating run at 1774579342 (after 7.4s of w.c.t.) + ./rsbench -oa -Ob --help /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench is a swiss army knife for testing the library functionality and performance. You can use it to perform sparse matrix - unitary vector multiplication, specifying the blocking parameters, the times to perform multiplication. @@ -4045,45 +4081,67 @@ Written by michelemartone_AT_users_DOT_sourceforge_DOT_net. + ./rsbench -I -cache block size : 26214 -hwloc size of cache level 1: 65536 -hwloc size of cache level 2: 524288 -detected max available cores/threads : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 -detected max OpenMP procs : 20 +cache block size : 99864 +hwloc size of cache level 1: 32768 +hwloc size of cache level 2: 4194304 +detected max available cores/threads : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 +detected max OpenMP procs : 42 detected 2 levels of cache -L1 size: 65536 -L2 size: 524288 +L1 size: 32768 +L2 size: 4194304 sysconf() : 4096 bytes per pagesize -sysconf() : 20586515 physical pages -sysconf() : 84322365440 bytes (80416 MB) of physical memory -sysconf() : 5378515 available (free) physical pages -sysconf() : 22030397440 available (free) physical memory -sysconf() , processors : 64 -sysconf() , processors online : 20 -sysconf() : level 1 cache size 65536 -sysconf() : level 1 cache associativity 2 +sysconf() : 20582916 physical pages +sysconf() : 84307623936 bytes (80402 MB) of physical memory +sysconf() : 15798676 available (free) physical pages +sysconf() : 64711376896 available (free) physical memory +sysconf() , processors : 128 +sysconf() , processors online : 42 +sysconf() : level 1 cache size 32768 +sysconf() : level 1 cache associativity 8 sysconf() : level 1 cache line size 64 -sysconf() : level 2 cache size 524288 -sysconf() : level 2 cache associativity 16 +sysconf() : level 2 cache size 2097152 +sysconf() : level 2 cache associativity 8 sysconf() : level 2 cache line size 64 sysconf() : no level 3 cache sysconf() : no level 4 cache @@ -4105,30 +4163,33 @@ RSB_SUBM_IDX_MARKER : 2147483647 RSB_MAX_ALLOCATABLE_MEMORY_CHUNK: 18446744073709551615 timing min delta (if negative, don't complain with us) : 0 s -timing granularity : 6.11067e-08 s +timing granularity : 3.35455e-08 s CFLAGS : -g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 CXXFLAGS : -g -O2 -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -fopenmp CC : gcc -memhinfo : L2:16/64/512K,L1:2/64/64K -detected free memory : 22029881344 -detected total memory : 84322365440 -for array sized 524288 elems, took 0.000623941 s for linear search and 0 s for binary search for element 524287, in 54 tries, for a total of 0.102144 s (ignore this:56622996) -for array sized 524288 elems, took 0.00029707 s for linear search and 0 s for binary search for element 262143, in 144 tries, for a total of 0.100424 s (ignore this:132120180) -for array sized 524288 elems, took 0.00014019 s for linear search and 0 s for binary search for element 131071, in 268 tries, for a total of 0.102858 s (ignore this:202374236) -for array sized 524288 elems, took 7.60555e-05 s for linear search and 0 s for binary search for element 65535, in 1067 tries, for a total of 0.100020 s (ignore this:342225926) -for array sized 524288 elems, took 3.91006e-05 s for linear search and 0 s for binary search for element 32767, in 2117 tries, for a total of 0.100030 s (ignore this:480961404) -for array sized 524288 elems, took 1.97887e-05 s for linear search and 0 s for binary search for element 16383, in 2735 tries, for a total of 0.100015 s (ignore this:570576414) -for array sized 524288 elems, took 9.77516e-06 s for linear search and 0 s for binary search for element 8191, in 5066 tries, for a total of 0.100009 s (ignore this:653567626) -for array sized 524288 elems, took 4.05312e-06 s for linear search and 0 s for binary search for element 4095, in 9920 tries, for a total of 0.100001 s (ignore this:734812426) -for array sized 524288 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 2047, in 17993 tries, for a total of 0.100001 s (ignore this:808475768) -for array sized 524288 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 1023, in 35778 tries, for a total of 0.100000 s (ignore this:881677556) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 511, in 64116 tries, for a total of 0.100001 s (ignore this:947204108) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 255, in 108065 tries, for a total of 0.101690 s (ignore this:1002317258) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 127, in 168292 tries, for a total of 0.106021 s (ignore this:1045063426) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 63, in 199380 tries, for a total of 0.103786 s (ignore this:1070185306) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 31, in 203095 tries, for a total of 0.101148 s (ignore this:1082777196) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 15, in 236688 tries, for a total of 0.100373 s (ignore this:1089877836) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 7, in 292331 tries, for a total of 0.105502 s (ignore this:1093970470) +memhinfo : L2:16/64/4M,L1:8/64/32K +detected free memory : 64711376896 +detected total memory : 84307623936 +for array sized 4194304 elems, took 0.00145388 s for linear search and 0 s for binary search for element 4194303, in 68 tries, for a total of 0.100502 s (ignore this:570425208) +for array sized 4194304 elems, took 0.000726938 s for linear search and 0 s for binary search for element 2097151, in 137 tries, for a total of 0.100258 s (ignore this:1145044582) +for array sized 4194304 elems, took 0.000362873 s for linear search and 0 s for binary search for element 1048575, in 273 tries, for a total of 0.100198 s (ignore this:1717566532) +for array sized 4194304 elems, took 0.00018096 s for linear search and 0 s for binary search for element 524287, in 547 tries, for a total of 0.100088 s (ignore this:-2003830786) +for array sized 4194304 elems, took 8.98838e-05 s for linear search and 0 s for binary search for element 262143, in 1093 tries, for a total of 0.100089 s (ignore this:-1430786188) +for array sized 4194304 elems, took 4.48227e-05 s for linear search and 0 s for binary search for element 131071, in 2174 tries, for a total of 0.100042 s (ignore this:-860889480) +for array sized 4194304 elems, took 2.19345e-05 s for linear search and 0 s for binary search for element 65535, in 4323 tries, for a total of 0.100011 s (ignore this:-294273870) +for array sized 4194304 elems, took 1.09673e-05 s for linear search and 0 s for binary search for element 32767, in 8682 tries, for a total of 0.100017 s (ignore this:274692318) +for array sized 4194304 elems, took 4.76837e-06 s for linear search and 0 s for binary search for element 16383, in 17199 tries, for a total of 0.100003 s (ignore this:838234752) +for array sized 4194304 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 8191, in 33766 tries, for a total of 0.100002 s (ignore this:1391389364) +for array sized 4194304 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 4095, in 65216 tries, for a total of 0.100001 s (ignore this:1925508404) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 2047, in 122104 tries, for a total of 0.100001 s (ignore this:-1869565116) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 1023, in 216842 tries, for a total of 0.100000 s (ignore this:-1425906384) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 511, in 353019 tries, for a total of 0.100000 s (ignore this:-1065120966) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 255, in 513510 tries, for a total of 0.100000 s (ignore this:-803230866) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 127, in 670686 tries, for a total of 0.100001 s (ignore this:-632876622) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 63, in 870678 tries, for a total of 0.100000 s (ignore this:-523171194) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 31, in 968745 tries, for a total of 0.100001 s (ignore this:-463109004) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 15, in 1009903 tries, for a total of 0.100001 s (ignore this:-432811914) +for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 7, in 1023102 tries, for a total of 0.100001 s (ignore this:-418488486) + ./rsbench -C /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench version: 1.3.0 format switches:br @@ -4155,7 +4216,7 @@ RSB_CONST_MAX_SUPPORTED_CORES:128 RSB_BLAS_MATRICES_MAX:2147482623 RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH:2 -RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/512K,L1:2/64/64K +RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/4096K,L1:8/64/32K RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t):65535 RSB_IOLEVEL:7 LIBRSBPP support: on. @@ -4175,17 +4236,17 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740166919 +# beginning run at 1774579345 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx --verbose --nrhs 1,4 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -# User did not specify threads; assuming 1. Environment provides max 20 threads; this build supports max 128. -# User did not specify threads; assuming 1. Environment provides max 20 threads; this build supports max 128. -# average timer granularity: 6e-08 s -# Will write a final performance record to file rsbench_pr__1740166919_gcc-14.2.rpr and periodic checkpoints to rsbench_pr__1740166919_gcc-14.2.rpr.tmp +# User did not specify threads; assuming 1. Environment provides max 42 threads; this build supports max 128. +# User did not specify threads; assuming 1. Environment provides max 42 threads; this build supports max 128. +# average timer granularity: 3.35e-08 s +# Will write a final performance record to file rsbench_pr__1774579345_gcc-14.2.rpr and periodic checkpoints to rsbench_pr__1774579345_gcc-14.2.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -4224,835 +4285,835 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos1-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.194s +# Memory benchmark took 6.226s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 16 samples (4480 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 5.201s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 6.228s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type D... -# file input of A.mtx took 0.00 s (6 nnz, 29712 nnz/s ) (0.91 MB/s ) -#pre-sorting (6 elements) took 0.0414691 s -#weeding duplicates (to 6 elements) took 7.15256e-06 s (and check, 2.86102e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 60495 nnz/s ) (1.86 MB/s ) +#pre-sorting (6 elements) took 0.00391793 s +#weeding duplicates (to 6 elements) took 2.14577e-06 s (and check, 1.90735e-06 s ) # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.147s): (3 x 3)[0x55af54ab9590]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.024s): (3 x 3)[0x55f57ad236a0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.05122s; avg 0.01707s ( +/- 6.13/ 9.26 %); best 0.01603s; worst 0.01865s; std dev. 0.001137 (taking best). -Reference operation time is 0.016026 s (0.001498 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01603 Mflops: 0.001) -Merge (3 -> 1 leaves) took w.c.t. of 5.198e-05s, ~1.717e-05s of computing time (of which 3.099e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 3.409e-05s; avg 1.136e-05s ( +/- 99.44/191.61 %); best 6.365e-08s; worst 3.314e-05s; std dev. 1.54e-05 (taking best). -Reference operation time is 6.36458e-08 s (377.1 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 377.087 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 251799.963x: 0.01603s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01167s; avg 0.003889s ( +/- 0.92/ 0.60 %); best 0.003853s; worst 0.003912s; std dev. 2.575e-05 (taking best). +Reference operation time is 0.00385284 s (0.006229 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003853 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 1.884e-05s, ~7.868e-06s of computing time (of which 1.907e-06s sorting, 2.861e-06s analysis) +3 iterations (42 th.) took 2.098e-05s; avg 6.994e-06s ( +/- 99.52/186.36 %); best 3.35e-08s; worst 2.003e-05s; std dev. 9.224e-06 (taking best). +Reference operation time is 3.34978e-08 s (716.5 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 716.465 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 115017.794x: 0.003853s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05597s (of which 5.984e-05s partitioning, 0s I/O); computing times: 1.717e-05s in par. loops, 3.099e-06s sorting, 1.001e-05s analyzing) -Total merge + benchmarking process took 0.05597s, equivalent to 879400.6/3.5 new/old ops (0.112s for 2 clones -- as 1760269.7/7.0 ops, or 880134.9/3.5 ops per clone), SPEEDUP of 251799.963x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 251799.963x (0.01603s -> 6.365e-08s), will amortize in 3.5 ops by saving 0.01603s per op. -In 1 tuning rounds (tot. 0.16s, 0.11s for constructor, 2 clones) obtained a SPEEDUP of 25179896.3% (2.518e+05x) (from 0.001498 to 377.1 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01157s (of which 2.313e-05s partitioning, 0s I/O); computing times: 7.868e-06s in par. loops, 1.907e-06s sorting, 2.861e-06s analyzing) +Total merge + benchmarking process took 0.01157s, equivalent to 345430.6/3.0 new/old ops (0.02279s for 2 clones -- as 680398.6/5.9 ops, or 340199.3/3.0 ops per clone), SPEEDUP of 115017.794x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 115017.794x (0.003853s -> 3.35e-08s), will amortize in 3.0 ops by saving 0.003853s per op. +In 1 tuning rounds (tot. 0.035s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11501679.4% (1.15e+05x) (from 0.006229 to 716.5 Mflops). #pr: updating sample at index 1 (0^th of 16), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.163596 s (1.603e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0345972 s (3.853e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.05194 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.193444 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.063404 0.039665 0.103069 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.103069 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.063404 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.039665 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.103069 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009348 0.007494 0.016842 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016842 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009348 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007494 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016842 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.147372 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.0634041 0 0.039665 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0244401 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00934792 0 0.00749397 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.145s): (3 x 3)[0x55af54ab9590]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.025s): (3 x 3)[0x55f57ad236a0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.08057s; avg 0.02686s ( +/- 10.71/ 19.07 %); best 0.02398s; worst 0.03198s; std dev. 0.003631 (taking best). -Reference operation time is 0.0239789 s (0.004004 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.02398 Mflops: 0.004) -Merge (3 -> 1 leaves) took w.c.t. of 3.099e-05s, ~1.001e-05s of computing time (of which 2.861e-06s sorting, 5.96e-06s analysis) -3 iterations (20 th.) took 8.821e-06s; avg 2.94e-06s ( +/- 97.84/167.57 %); best 6.365e-08s; worst 7.868e-06s; std dev. 3.506e-06 (taking best). -Reference operation time is 6.36458e-08 s (1508 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 1508.347 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 376755.947x: 0.02398s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01115s; avg 0.003718s ( +/- 1.55/ 0.82 %); best 0.00366s; worst 0.003748s; std dev. 4.071e-05 (taking best). +Reference operation time is 0.0036602 s (0.02623 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.00366 Mflops: 0.026) +Merge (3 -> 1 leaves) took w.c.t. of 1.311e-05s, ~6.199e-06s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) +3 iterations (42 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 97.99/200.00 %); best 3.35e-08s; worst 5.007e-06s; std dev. 2.36e-06 (taking best). +Reference operation time is 3.34978e-08 s (2866 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 2865.859 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 109266.904x: 0.00366s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06193s (of which 3.6e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 2.861e-06s sorting, 5.96e-06s analyzing) -Total merge + benchmarking process took 0.06193s, equivalent to 973103.6/2.6 new/old ops (0.1236s for 2 clones -- as 1941711.9/5.2 ops, or 970856.0/2.6 ops per clone), SPEEDUP of 376755.947x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 376755.947x (0.02398s -> 6.365e-08s), will amortize in 2.6 ops by saving 0.02398s per op. -In 1 tuning rounds (tot. 0.2s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 37675494.7% (3.768e+05x) (from 0.004004 to 1508 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01139s (of which 1.621e-05s partitioning, 0s I/O); computing times: 6.199e-06s in par. loops, 1.907e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01139s, equivalent to 340142.3/3.1 new/old ops (0.02297s for 2 clones -- as 685800.7/6.3 ops, or 342900.4/3.1 ops per clone), SPEEDUP of 109266.904x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 109266.904x (0.00366s -> 3.35e-08s), will amortize in 3.1 ops by saving 0.00366s per op. +In 1 tuning rounds (tot. 0.034s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 10926590.4% (1.093e+05x) (from 0.02623 to 2866 Mflops). #pr: updating sample at index 9 (1^th of 16), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.204501 s (2.398e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0342109 s (3.660e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.18541 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.189973 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.059167 0.040015 0.099182 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.099182 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.059167 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.040015 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.099182 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009218 0.007667 0.016885 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016885 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009218 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007667 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016885 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.14518 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.0591671 0 0.040015 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0245459 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00921798 0 0.00766706 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 8.379s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.605s/0.000s . +# so far, program took 6.787s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.452s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.07677s (system CPU time used) -ru_utime : 32.4s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.2153s (system CPU time used) +ru_utime : 26.94s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 8.379s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.605s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 6.787s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.452s/0.000s . # Reusing type converted (D->S) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.143s): (3 x 3)[0x55af54ab9590]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.027s): (3 x 3)[0x55f57ad236a0]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.07203s; avg 0.02401s ( +/- 16.54/ 16.52 %); best 0.02004s; worst 0.02797s; std dev. 0.00324 (taking best). -Reference operation time is 0.0200372 s (0.001198 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.02004 Mflops: 0.001) -Merge (3 -> 1 leaves) took w.c.t. of 3.886e-05s, ~1.216e-05s of computing time (of which 2.861e-06s sorting, 6.914e-06s analysis) -3 iterations (20 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 99.48/192.26 %); best 6.365e-08s; worst 3.6e-05s; std dev. 1.675e-05 (taking best). -Reference operation time is 6.36458e-08 s (377.1 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 377.087 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 314823.001x: 0.02004s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01172s; avg 0.003906s ( +/- 2.16/ 1.32 %); best 0.003822s; worst 0.003958s; std dev. 6.007e-05 (taking best). +Reference operation time is 0.00382209 s (0.006279 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003822 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 1.287e-05s, ~5.007e-06s of computing time (of which 2.146e-06s sorting, 1.907e-06s analysis) +3 iterations (42 th.) took 2.193e-05s; avg 7.312e-06s ( +/- 99.54/186.96 %); best 3.35e-08s; worst 2.098e-05s; std dev. 9.674e-06 (taking best). +Reference operation time is 3.34978e-08 s (716.5 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 716.465 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 114099.644x: 0.003822s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06791s (of which 4.292e-05s partitioning, 0s I/O); computing times: 1.216e-05s in par. loops, 2.861e-06s sorting, 6.914e-06s analyzing) -Total merge + benchmarking process took 0.06791s, equivalent to 1067027.5/3.4 new/old ops (0.1397s for 2 clones -- as 2195242.6/7.0 ops, or 1097621.3/3.5 ops per clone), SPEEDUP of 314823.001x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 314823.001x (0.02004s -> 6.365e-08s), will amortize in 3.4 ops by saving 0.02004s per op. -In 1 tuning rounds (tot. 0.21s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 31482200.1% (3.148e+05x) (from 0.001198 to 377.1 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01141s (of which 1.693e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 2.146e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01141s, equivalent to 340619.2/3.0 new/old ops (0.02477s for 2 clones -- as 739508.9/6.5 ops, or 369754.4/3.2 ops per clone), SPEEDUP of 114099.644x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 114099.644x (0.003822s -> 3.35e-08s), will amortize in 3.0 ops by saving 0.003822s per op. +In 1 tuning rounds (tot. 0.037s, 0.025s for constructor, 2 clones) obtained a SPEEDUP of 11409864.4% (1.141e+05x) (from 0.006279 to 716.5 Mflops). #pr: updating sample at index 3 (2^th of 16), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.212084 s (2.004e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0367529 s (3.822e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.979866 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.176516 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.055442 0.043997 0.099439 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.099439 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.055442 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.043997 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.099439 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009083 0.007826 0.016909 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016909 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009083 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007826 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016909 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.143436 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.0554419 0 0.0439968 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.026747 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00908303 0 0.00782609 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.136s): (3 x 3)[0x55af54ab9590]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.025s): (3 x 3)[0x55f57ad236a0]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.06799s; avg 0.02266s ( +/- 29.35/ 23.49 %); best 0.01601s; worst 0.02799s; std dev. 0.004978 (taking best). -Reference operation time is 0.016012 s (0.005996 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01601 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 3.505e-05s, ~1.192e-05s of computing time (of which 2.146e-06s sorting, 5.96e-06s analysis) -3 iterations (20 th.) took 3.004e-05s; avg 1.001e-05s ( +/- 99.36/190.48 %); best 6.365e-08s; worst 2.909e-05s; std dev. 1.349e-05 (taking best). -Reference operation time is 6.36458e-08 s (1508 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 1508.347 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 251578.947x: 0.01601s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01134s; avg 0.003779s ( +/- 0.45/ 0.75 %); best 0.003762s; worst 0.003807s; std dev. 2.023e-05 (taking best). +Reference operation time is 0.00376177 s (0.02552 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003762 Mflops: 0.026) +Merge (3 -> 1 leaves) took w.c.t. of 1.383e-05s, ~5.96e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) +3 iterations (42 th.) took 1.907e-05s; avg 6.358e-06s ( +/- 99.47/200.00 %); best 3.35e-08s; worst 1.907e-05s; std dev. 8.991e-06 (taking best). +Reference operation time is 3.34978e-08 s (2866 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 2865.859 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 112298.932x: 0.003762s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06403s (of which 4.315e-05s partitioning, 0s I/O); computing times: 1.192e-05s in par. loops, 2.146e-06s sorting, 5.96e-06s analyzing) -Total merge + benchmarking process took 0.06403s, equivalent to 1005974.9/4.0 new/old ops (0.1398s for 2 clones -- as 2196448.8/8.7 ops, or 1098224.4/4.4 ops per clone), SPEEDUP of 251578.947x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 251578.947x (0.01601s -> 6.365e-08s), will amortize in 4.0 ops by saving 0.01601s per op. -In 1 tuning rounds (tot. 0.21s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 25157794.7% (2.516e+05x) (from 0.005996 to 1508 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01169s (of which 1.812e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 1.192e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01169s, equivalent to 349010.7/3.1 new/old ops (0.02265s for 2 clones -- as 676071.2/6.0 ops, or 338035.6/3.0 ops per clone), SPEEDUP of 112298.932x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 112298.932x (0.003762s -> 3.35e-08s), will amortize in 3.1 ops by saving 0.003762s per op. +In 1 tuning rounds (tot. 0.034s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11229793.2% (1.123e+05x) (from 0.02552 to 2866 Mflops). #pr: updating sample at index 11 (3^th of 16), 0^th touch for (0,0,0,0,1,1,0). -First run of RSB Autotuner took 0.208041 s (1.601e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0343659 s (3.762e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.839873 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.168203 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.055765 0.027985 0.083750 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.083750 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.055765 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.027985 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.083750 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009519 0.007823 0.017342 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.017342 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009519 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007823 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.017342 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.135787 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.0557652 0 0.0279851 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0250449 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.0095191 0 0.00782299 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 11.127s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.845s/0.000s . +# so far, program took 7.299s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.868s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.1084s (system CPU time used) -ru_utime : 59.52s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.2709s (system CPU time used) +ru_utime : 45.7s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was S). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 11.127s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.845s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 7.299s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.868s/0.000s . # Reusing type converted (S->C) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.140s): (3 x 3)[0x55af54abc8a0]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.025s): (3 x 3)[0x55f57ad26a20]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.056s; avg 0.01867s ( +/- 14.12/ 7.09 %); best 0.01603s; worst 0.01999s; std dev. 0.001864 (taking best). -Reference operation time is 0.0160301 s (0.005989 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01603 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 3.91e-05s, ~1.097e-05s of computing time (of which 1.907e-06s sorting, 5.96e-06s analysis) -3 iterations (20 th.) took 3.505e-05s; avg 1.168e-05s ( +/- 99.46/189.80 %); best 6.365e-08s; worst 3.386e-05s; std dev. 1.569e-05 (taking best). -Reference operation time is 6.36458e-08 s (1508 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 1508.347 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 251863.645x: 0.01603s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01137s; avg 0.003792s ( +/- 5.35/ 3.12 %); best 0.003589s; worst 0.00391s; std dev. 0.000144 (taking best). +Reference operation time is 0.00358891 s (0.02675 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003589 Mflops: 0.027) +Merge (3 -> 1 leaves) took w.c.t. of 1.311e-05s, ~5.007e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) +3 iterations (42 th.) took 1.693e-05s; avg 5.643e-06s ( +/- 99.41/183.10 %); best 3.35e-08s; worst 1.597e-05s; std dev. 7.316e-06 (taking best). +Reference operation time is 3.34978e-08 s (2866 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 2865.859 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 107138.790x: 0.003589s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05595s (of which 4.315e-05s partitioning, 0s I/O); computing times: 1.097e-05s in par. loops, 1.907e-06s sorting, 5.96e-06s analyzing) -Total merge + benchmarking process took 0.05595s, equivalent to 879130.9/3.5 new/old ops (0.1193s for 2 clones -- as 1874766.8/7.4 ops, or 937383.4/3.7 ops per clone), SPEEDUP of 251863.645x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 251863.645x (0.01603s -> 6.365e-08s), will amortize in 3.5 ops by saving 0.01603s per op. -In 1 tuning rounds (tot. 0.18s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 25186264.5% (2.519e+05x) (from 0.005989 to 1508 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01192s (of which 1.693e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.907e-06s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.01192s, equivalent to 355964.4/3.3 new/old ops (0.02299s for 2 clones -- as 686284.7/6.4 ops, or 343142.3/3.2 ops per clone), SPEEDUP of 107138.790x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 107138.790x (0.003589s -> 3.35e-08s), will amortize in 3.3 ops by saving 0.003589s per op. +In 1 tuning rounds (tot. 0.034s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 10713779.0% (1.071e+05x) (from 0.02675 to 2866 Mflops). #pr: updating sample at index 5 (4^th of 16), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.175557 s (1.603e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0344639 s (3.589e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.00391 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.198782 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.059757 0.044021 0.103778 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.103778 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.059757 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.044021 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.103778 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009361 0.007892 0.017253 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.017253 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009361 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007892 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.017253 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.139785 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.059757 0 0.0440211 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0252512 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00936103 0 0.00789213 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.132s): (3 x 3)[0x55af54abc8a0]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.025s): (3 x 3)[0x55f57ad26a20]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.06002s; avg 0.02001s ( +/- 40.12/ 20.28 %); best 0.01198s; worst 0.02406s; std dev. 0.005676 (taking best). -Reference operation time is 0.0119791 s (0.03206 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01198 Mflops: 0.032) -Merge (3 -> 1 leaves) took w.c.t. of 3.505e-05s, ~1.001e-05s of computing time (of which 1.192e-06s sorting, 5.96e-06s analysis) -3 iterations (20 th.) took 3.099e-05s; avg 1.033e-05s ( +/- 90.77/181.54 %); best 9.537e-07s; worst 2.909e-05s; std dev. 1.326e-05 (taking best). -Reference operation time is 9.53674e-07 s (402.7 Mflops) with 20 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 12561.000x: 0.01198s -> 9.537e-07s, so taking this instance. +3 iterations (42 th.) took 0.01219s; avg 0.004065s ( +/- 0.85/ 0.77 %); best 0.00403s; worst 0.004096s; std dev. 2.706e-05 (taking best). +Reference operation time is 0.00402999 s (0.09529 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.00403 Mflops: 0.095) +Merge (3 -> 1 leaves) took w.c.t. of 1.001e-05s, ~5.007e-06s of computing time (of which 1.907e-06s sorting, 9.537e-07s analysis) +3 iterations (42 th.) took 2.003e-05s; avg 6.676e-06s ( +/- 99.50/185.71 %); best 3.35e-08s; worst 1.907e-05s; std dev. 8.775e-06 (taking best). +Reference operation time is 3.34978e-08 s (1.146e+04 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 11463.436 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 120306.050x: 0.00403s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05995s (of which 3.982e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 1.192e-06s sorting, 5.96e-06s analyzing) -Total merge + benchmarking process took 0.05995s, equivalent to 62862.0/5.0 new/old ops (0.1197s for 2 clones -- as 125531.0/10.0 ops, or 62765.5/5.0 ops per clone), SPEEDUP of 12561.000x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 12561.000x (0.01198s -> 9.537e-07s), will amortize in 5.0 ops by saving 0.01198s per op. -In 1 tuning rounds (tot. 0.18s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 1256000.0% (1.256e+04x) (from 0.03206 to 402.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01188s (of which 1.407e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.907e-06s sorting, 9.537e-07s analyzing) +Total merge + benchmarking process took 0.01188s, equivalent to 354533.8/2.9 new/old ops (0.02363s for 2 clones -- as 705565.8/5.9 ops, or 352782.9/2.9 ops per clone), SPEEDUP of 120306.050x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 120306.050x (0.00403s -> 3.35e-08s), will amortize in 2.9 ops by saving 0.00403s per op. +In 1 tuning rounds (tot. 0.036s, 0.024s for constructor, 2 clones) obtained a SPEEDUP of 12030505.0% (1.203e+05x) (from 0.09529 to 1.146e+04 Mflops). #pr: updating sample at index 13 (5^th of 16), 0^th touch for (0,0,0,0,1,2,0). -First run of RSB Autotuner took 0.179978 s (1.198e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0359259 s (4.030e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.12391 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.19716 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.047560 0.036033 0.083593 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.083593 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.047560 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.036033 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.083593 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009378 0.007567 0.016945 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016945 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009378 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007567 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016945 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.131623 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.04756 0 0.0360329 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0248699 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00937796 0 0.00756693 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 14.072s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 7.329s/0.000s . +# so far, program took 7.861s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.334s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.1419s (system CPU time used) -ru_utime : 89.3s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.2811s (system CPU time used) +ru_utime : 66.26s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was C). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 14.072s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 7.329s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 7.861s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.334s/0.000s . # Reusing type converted (C->Z) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.131s): (3 x 3)[0x55af54abc8a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.025s): (3 x 3)[0x55f57ad26a20]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.04799s; avg 0.016s ( +/- 0.16/ 0.14 %); best 0.01597s; worst 0.01602s; std dev. 1.976e-05 (taking best). -Reference operation time is 0.0159721 s (0.00601 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01597 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 4.101e-05s, ~1.502e-05s of computing time (of which 1.907e-06s sorting, 6.199e-06s analysis) -3 iterations (20 th.) took 4.005e-05s; avg 1.335e-05s ( +/- 99.52/192.86 %); best 6.365e-08s; worst 3.91e-05s; std dev. 1.821e-05 (taking best). -Reference operation time is 6.36458e-08 s (1508 Mflops) with 20 threads. -After merge step 1: tpop: 6.365e-08 s ~Mflops: 1508.347 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 250953.362x: 0.01597s -> 6.365e-08s, so taking this instance. +3 iterations (42 th.) took 0.01177s; avg 0.003924s ( +/- 1.95/ 2.22 %); best 0.003847s; worst 0.004011s; std dev. 6.73e-05 (taking best). +Reference operation time is 0.00384712 s (0.02495 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.003847 Mflops: 0.025) +Merge (3 -> 1 leaves) took w.c.t. of 1.383e-05s, ~5.96e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) +3 iterations (42 th.) took 2.193e-05s; avg 7.312e-06s ( +/- 99.54/200.00 %); best 3.35e-08s; worst 2.193e-05s; std dev. 1.034e-05 (taking best). +Reference operation time is 3.34978e-08 s (2866 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 2865.859 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 114846.975x: 0.003847s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06397s (of which 4.697e-05s partitioning, 0s I/O); computing times: 1.502e-05s in par. loops, 1.907e-06s sorting, 6.199e-06s analyzing) -Total merge + benchmarking process took 0.06397s, equivalent to 1005060.9/4.0 new/old ops (0.1237s for 2 clones -- as 1943472.6/7.7 ops, or 971736.3/3.9 ops per clone), SPEEDUP of 250953.362x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 250953.362x (0.01597s -> 6.365e-08s), will amortize in 4.0 ops by saving 0.01597s per op. -In 1 tuning rounds (tot. 0.17s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 25095236.2% (2.51e+05x) (from 0.00601 to 1508 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.01163s (of which 1.693e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 1.192e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.01163s, equivalent to 347309.6/3.0 new/old ops (0.02336s for 2 clones -- as 697210.0/6.1 ops, or 348605.0/3.0 ops per clone), SPEEDUP of 114846.975x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 114846.975x (0.003847s -> 3.35e-08s), will amortize in 3.0 ops by saving 0.003847s per op. +In 1 tuning rounds (tot. 0.035s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11484597.5% (1.148e+05x) (from 0.02495 to 2866 Mflops). #pr: updating sample at index 7 (6^th of 16), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.171963 s (1.597e-02 s -> 6.365e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0352328 s (3.847e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.07594 s and estimated a speedup of 1.000000 x (6.365e-08 s -> 6.365e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.192596 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.051002 0.047357 0.098359 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.098359 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.051002 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.047357 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.098359 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.009154 0.007501 0.016655 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016655 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.009154 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007501 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.016655 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.131082 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.051002 0 0.0473571 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0246501 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.00915384 0 0.00750089 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 20 threads -# Constructed matrix (took 0.155s): (3 x 3)[0x55af54abc8a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 42 threads +# Constructed matrix (took 0.027s): (3 x 3)[0x55f57ad26a20]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (20 th.) took 0.06s; avg 0.02s ( +/- 40.09/ 20.05 %); best 0.01198s; worst 0.02401s; std dev. 0.005669 (taking best). -Reference operation time is 0.011982 s (0.03205 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01198 Mflops: 0.032) -Merge (3 -> 1 leaves) took w.c.t. of 3.409e-05s, ~1.097e-05s of computing time (of which 3.099e-06s sorting, 5.007e-06s analysis) -3 iterations (20 th.) took 1.478e-05s; avg 4.927e-06s ( +/- 80.65/161.29 %); best 9.537e-07s; worst 1.287e-05s; std dev. 5.62e-06 (taking best). -Reference operation time is 9.53674e-07 s (402.7 Mflops) with 20 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:20 -Applying merge (3 -> 1 leaves, 20 th.) yielded SPEEDUP of 12564.000x: 0.01198s -> 9.537e-07s, so taking this instance. +3 iterations (42 th.) took 0.01147s; avg 0.003824s ( +/- 1.94/ 2.91 %); best 0.00375s; worst 0.003935s; std dev. 8.002e-05 (taking best). +Reference operation time is 0.00374985 s (0.1024 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.00375 Mflops: 0.102) +Merge (3 -> 1 leaves) took w.c.t. of 1.287e-05s, ~5.007e-06s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) +3 iterations (42 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 98.31/152.00 %); best 3.35e-08s; worst 5.007e-06s; std dev. 2.171e-06 (taking best). +Reference operation time is 3.34978e-08 s (1.146e+04 Mflops) with 42 threads. +After merge step 1: tpop: 3.35e-08 s ~Mflops: 11463.436 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 111943.060x: 0.00375s -> 3.35e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06s (of which 3.91e-05s partitioning, 0s I/O); computing times: 1.097e-05s in par. loops, 3.099e-06s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.06s, equivalent to 62913.5/5.0 new/old ops (0.1198s for 2 clones -- as 125584.0/10.0 ops, or 62792.0/5.0 ops per clone), SPEEDUP of 12564.000x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 12564.000x (0.01198s -> 9.537e-07s), will amortize in 5.0 ops by saving 0.01198s per op. -In 1 tuning rounds (tot. 0.18s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 1256300.0% (1.256e+04x) (from 0.03205 to 402.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.0115s (of which 1.693e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.907e-06s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.0115s, equivalent to 343302.5/3.1 new/old ops (0.02315s for 2 clones -- as 690968.0/6.2 ops, or 345484.0/3.1 ops per clone), SPEEDUP of 111943.060x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 111943.060x (0.00375s -> 3.35e-08s), will amortize in 3.1 ops by saving 0.00375s per op. +In 1 tuning rounds (tot. 0.035s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 11194206.0% (1.119e+05x) (from 0.1024 to 1.146e+04 Mflops). #pr: updating sample at index 15 (7^th of 16), 0^th touch for (0,0,0,0,1,3,0). -First run of RSB Autotuner took 0.179992 s (1.198e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0348589 s (3.750e-03 s -> 3.350e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 1.02743 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.195505 s and estimated a speedup of 1.000000 x (3.350e-08 s -> 3.350e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 20 3 3 6 0.000000 0.058994 0.045833 0.104827 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.104827 -%:RSB_SUBDIVISION_TIME:A.mtx S N 20 3 3 6 0.058994 -%:RSB_SHUFFLE_TIME:A.mtx S N 20 3 3 6 0.045833 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 20 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 20 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 20 3 3 6 0.104827 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 20 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 20 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 20 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 20 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 42 3 3 6 0.000000 0.011535 0.007652 0.019187 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.019187 +%:RSB_SUBDIVISION_TIME:A.mtx S N 42 3 3 6 0.011535 +%:RSB_SHUFFLE_TIME:A.mtx S N 42 3 3 6 0.007652 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 42 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 42 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 42 3 3 6 0.019187 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 42 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 42 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 42 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 42 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 20 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 20 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 20 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 20 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 20 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 42 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 42 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 42 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 42 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 42 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[20] SPMV[20] SPMV[20] -%operation:A.mtx 0.155043 1e+09 1e+09 -%constructor:matrix SORT[20] SCAN[20] SHUFFLE[20] INSERT[20] -%constructor:A.mtx 0 0.0589941 0 0.0458331 +%operation:matrix CONSTRUCTOR[42] SPMV[42] SPMV[42] +%operation:A.mtx 0.0266988 1e+09 1e+09 +%constructor:matrix SORT[42] SCAN[42] SHUFFLE[42] INSERT[42] +%constructor:A.mtx 0 0.0115349 0 0.00765204 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 17.023s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 9.784s/0.000s . +# so far, program took 8.417s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.793s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.1994s (system CPU time used) -ru_utime : 117.3s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.3514s (system CPU time used) +ru_utime : 86.46s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to type D: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 20 20 0 4.0000 4.6667 3 1 377.09 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.636e-01 2.33e+00 5.17e+00 1 2.40e-05 -pr: 9:R_R A 3 3 6 4 D S N 20 20 0 4.0000 4.6667 3 1 1508.35 2.398e-02 0.000e+00 6.365e-08 0.000e+00 2.045e-01 5.72e+00 2.79e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 42 42 0 4.0000 4.6667 3 1 716.46 3.853e-03 0.000e+00 3.350e-08 0.000e+00 3.460e-02 4.42e+00 5.17e+00 1 2.40e-05 +pr: 9:R_R A 3 3 6 4 D S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.660e-03 0.000e+00 3.350e-08 0.000e+00 3.421e-02 1.09e+01 2.79e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 31427695.5 % faster, avg. sp. ratio 314277.955x, max sp. ratio 376755.947x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2891758.8/2570410.2/3213107.3/5783517.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.4/ 8.5/ 10.2/ 18.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.4, min. 8.5, max. 10.2 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11214134.9 % faster, avg. sp. ratio 112142.349x, max sp. ratio 115017.794x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1027053.4/1021288.3/1032818.5/2054106.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.2/ 9.0/ 9.3/ 18.3 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.2, min. 9.0, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.080/ 1.948/ 4.211,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.045/ 2.325/ 5.719,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.851/ 3.702/ 8.001,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.285/ 4.418/ 10.866,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 3.979/ 2.792/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.37 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.37 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.427e+02, min 3.771e+02, max 1.508e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.751e-03, min 1.498e-03, max 4.004e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.365e-08s, min 6.365e-08s, max 6.365e-08s, tot 1.273e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 2.000e-02s, min 1.603e-02s, max 2.398e-02s, tot 4.000e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.292e+00 2.793e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.03 s, max 0.03 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.791e+03, min 7.165e+02, max 2.866e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.623e-02, min 6.229e-03, max 2.623e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 6.700e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.757e-03s, min 3.660e-03s, max 3.853e-03s, tot 7.513e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.005e+00 4.333e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type S: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 1 S S N 20 20 0 4.0000 4.6667 3 1 377.09 2.004e-02 0.000e+00 6.365e-08 0.000e+00 2.121e-01 1.38e+00 3.17e+00 1 2.40e-05 -pr: 11:R_R A 3 3 6 4 S S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.601e-02 0.000e+00 6.365e-08 0.000e+00 2.080e-01 3.08e+00 1.54e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 1 S S N 42 42 0 4.0000 4.6667 3 1 716.46 3.822e-03 0.000e+00 3.350e-08 0.000e+00 3.675e-02 2.63e+00 3.17e+00 1 2.40e-05 +pr: 11:R_R A 3 3 6 4 S S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.762e-03 0.000e+00 3.350e-08 0.000e+00 3.437e-02 5.85e+00 1.54e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 28319997.4 % faster, avg. sp. ratio 283200.974x, max sp. ratio 314823.001x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3300490.7/3268728.2/3332253.2/6600981.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.8/ 10.6/ 13.0/ 23.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.8, min. 10.6, max. 13.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11319828.8 % faster, avg. sp. ratio 113199.288x, max sp. ratio 114099.644x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1061544.5/1025914.6/1097174.4/2123089.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.4/ 9.1/ 9.6/ 18.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.4, min. 9.1, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 8/ 8/ 8) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 24/ 24/ 24) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.760/ 1.194/ 2.325,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.462/ 1.383/ 3.080,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.344/ 2.269/ 4.418,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.478/ 2.627/ 5.851,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.354/ 1.542/ 3.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.21 s, min 0.21 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.21 s, min 0.21 s, max 0.21 s, tot 0.42 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.427e+02, min 3.771e+02, max 1.508e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.597e-03, min 1.198e-03, max 5.996e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.365e-08s, min 6.365e-08s, max 6.365e-08s, tot 1.273e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.802e-02s, min 1.601e-02s, max 2.004e-02s, tot 3.605e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.340e+00 4.557e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.791e+03, min 7.165e+02, max 2.866e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.590e-02, min 6.279e-03, max 2.552e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 6.700e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.792e-03s, min 3.762e-03s, max 3.822e-03s, tot 7.584e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.631e+00 7.070e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type C: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 5:R_R A 3 3 6 1 C S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.756e-01 2.33e+00 1.29e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 5:R_R A 3 3 6 1 C S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.589e-03 0.000e+00 3.350e-08 0.000e+00 3.446e-02 4.42e+00 1.29e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 42 42 0 4.0000 4.6667 3 1 11463.44 4.030e-03 0.000e+00 3.350e-08 0.000e+00 3.593e-02 1.09e+01 6.98e-01 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 13221132.2 % faster, avg. sp. ratio 132212.322x, max sp. ratio 251863.645x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1473530.5/188720.5/2758340.5/2947061.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.0/ 11.0/ 15.0/ 26.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 13.0, min. 11.0, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11372142.0 % faster, avg. sp. ratio 113722.420x, max sp. ratio 120306.050x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1050661.9/1028839.9/1072484.0/2101323.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 8.9/ 9.6/ 18.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.3, min. 8.9, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.115/ 0.281/ 1.948,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 2.707/ 0.382/ 2.325,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.851/ 3.702/ 8.001,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.285/ 4.418/ 10.866,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 0.995/ 0.698/ 1.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.18 s, max 0.18 s, tot 0.36 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.18 s, max 0.18 s, tot 0.36 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.555e+02, min 4.027e+02, max 1.508e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.902e-02, min 5.989e-03, max 3.206e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.087e-07s, min 6.365e-08s, max 9.537e-07s, tot 1.017e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.400e-02s, min 1.198e-02s, max 1.603e-02s, tot 2.801e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.793e+00 1.936e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.670e-01 x, min 2.670e-01 x, max 2.670e-01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 7.165e+03, min 2.866e+03, max 1.146e+04 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 6.102e-02, min 2.675e-02, max 9.529e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 6.700e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.809e-03s, min 3.589e-03s, max 4.030e-03s, tot 7.619e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.005e+00 4.333e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type Z: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 7:R_R A 3 3 6 1 Z S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.597e-02 0.000e+00 6.365e-08 0.000e+00 1.720e-01 4.21e+00 2.29e+00 1 9.60e-05 -pr: 15:R_R A 3 3 6 4 Z S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 7.34e-01 1.32e+00 1 3.84e-04 +pr: 7:R_R A 3 3 6 1 Z S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.847e-03 0.000e+00 3.350e-08 0.000e+00 3.523e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 15:R_R A 3 3 6 4 Z S N 42 42 0 4.0000 4.6667 3 1 11463.44 3.750e-03 0.000e+00 3.350e-08 0.000e+00 3.486e-02 2.09e+01 1.32e+00 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 13175768.1 % faster, avg. sp. ratio 131758.681x, max sp. ratio 250953.362x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1445306.0/188735.2/2701876.8/2890612.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.9/ 10.8/ 15.0/ 25.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 12.9, min. 10.8, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11339401.8 % faster, avg. sp. ratio 113395.018x, max sp. ratio 114846.975x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1046213.5/1040633.5/1051793.6/2092427.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.2/ 9.2/ 9.3/ 18.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.2, min. 9.2, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.995/ 0.533/ 3.457,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.945/ 0.734/ 4.211,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 10.866/ 6.568/ 15.165,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 28.897/ 8.001/ 20.897,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.807/ 1.323/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.17 s, max 0.18 s, tot 0.35 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.17 s, max 0.18 s, tot 0.35 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.555e+02, min 4.027e+02, max 1.508e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.903e-02, min 6.010e-03, max 3.205e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.087e-07s, min 6.365e-08s, max 9.537e-07s, tot 1.017e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.398e-02s, min 1.198e-02s, max 1.597e-02s, tot 2.795e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.574e+00 1.022e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.670e-01 x, min 2.670e-01 x, max 2.670e-01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.07 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 7.165e+03, min 2.866e+03, max 1.146e+04 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 6.368e-02, min 2.495e-02, max 1.024e-01 (2 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 6.700e-08s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.798e-03s, min 3.750e-03s, max 3.847e-03s, tot 7.597e-03s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.058e+00 2.442e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 20 20 0 4.0000 4.6667 3 1 377.09 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.636e-01 2.33e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 20 20 0 4.0000 4.6667 3 1 377.09 2.004e-02 0.000e+00 6.365e-08 0.000e+00 2.121e-01 1.38e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.756e-01 2.33e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.597e-02 0.000e+00 6.365e-08 0.000e+00 1.720e-01 4.21e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 42 42 0 4.0000 4.6667 3 1 716.46 3.853e-03 0.000e+00 3.350e-08 0.000e+00 3.460e-02 4.42e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 42 42 0 4.0000 4.6667 3 1 716.46 3.822e-03 0.000e+00 3.350e-08 0.000e+00 3.675e-02 2.63e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.589e-03 0.000e+00 3.350e-08 0.000e+00 3.446e-02 4.42e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.847e-03 0.000e+00 3.350e-08 0.000e+00 3.523e-02 8.00e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 26735899.3 % faster, avg. sp. ratio 267359.993x, max sp. ratio 314823.001x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2840720.2/2570410.2/3332253.2/11362880.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.6/ 10.2/ 11.0/ 42.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.6, min. 10.2, max. 11.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11277480.1 % faster, avg. sp. ratio 112775.801x, max sp. ratio 115017.794x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1052656.6/1028839.9/1097174.4/4210626.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 9.6/ 37.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.137/ 1.194/ 3.457,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.244/ 1.383/ 4.211,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.060/ 2.269/ 6.568,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 19.464/ 2.627/ 8.001,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.16 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.16 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.427e+02, min 3.771e+02, max 1.508e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.674e-03, min 1.198e-03, max 6.010e-03 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.365e-08s, min 6.365e-08s, max 6.365e-08s, tot 2.546e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.702e-02s, min 1.597e-02s, max 2.004e-02s, tot 6.807e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.574e+00 4.557e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.791e+03, min 7.165e+02, max 2.866e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.605e-02, min 6.229e-03, max 2.675e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.778e-03s, min 3.589e-03s, max 3.853e-03s, tot 1.511e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.442e+00 7.070e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 #pr: ======== Limiting to nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 20 20 0 4.0000 4.6667 3 1 1508.35 2.398e-02 0.000e+00 6.365e-08 0.000e+00 2.045e-01 5.72e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.601e-02 0.000e+00 6.365e-08 0.000e+00 2.080e-01 3.08e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 7.34e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.660e-03 0.000e+00 3.350e-08 0.000e+00 3.421e-02 1.09e+01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.762e-03 0.000e+00 3.350e-08 0.000e+00 3.437e-02 5.85e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 42 42 0 4.0000 4.6667 3 1 11463.44 4.030e-03 0.000e+00 3.350e-08 0.000e+00 3.593e-02 1.09e+01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 42 42 0 4.0000 4.6667 3 1 11463.44 3.750e-03 0.000e+00 3.350e-08 0.000e+00 3.486e-02 2.09e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 16336397.4 % faster, avg. sp. ratio 163364.974x, max sp. ratio 376755.947x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1714822.8/188720.5/3268728.2/6859291.3 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.9/ 8.5/ 15.0/ 51.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 12.9, min. 8.5, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11345273.7 % faster, avg. sp. ratio 113453.737x, max sp. ratio 120306.050x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1040080.1/1021288.3/1072484.0/4160320.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.2/ 8.9/ 9.3/ 36.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.2, min. 8.9, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.837/ 0.281/ 4.211,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 9.914/ 0.382/ 5.719,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 8.896/ 4.418/ 15.165,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 48.481/ 5.851/ 20.897,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.18 s, max 0.21 s, tot 0.77 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.18 s, max 0.21 s, tot 0.77 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.555e+02, min 4.027e+02, max 1.508e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.853e-02, min 4.004e-03, max 3.206e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.087e-07s, min 6.365e-08s, max 9.537e-07s, tot 2.035e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.198e-02s, max 2.398e-02s, tot 6.395e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.292e+00 1.936e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.133e+00 x, min 2.670e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 7.165e+03, min 2.866e+03, max 1.146e+04 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 6.236e-02, min 2.552e-02, max 1.024e-01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.800e-03s, min 3.660e-03s, max 4.030e-03s, tot 1.520e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.058e+00 3.631e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 20 20 0 4.0000 4.6667 3 1 377.09 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.636e-01 2.33e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 20 20 0 4.0000 4.6667 3 1 377.09 2.004e-02 0.000e+00 6.365e-08 0.000e+00 2.121e-01 1.38e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.756e-01 2.33e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.597e-02 0.000e+00 6.365e-08 0.000e+00 1.720e-01 4.21e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 20 20 0 4.0000 4.6667 3 1 1508.35 2.398e-02 0.000e+00 6.365e-08 0.000e+00 2.045e-01 5.72e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.601e-02 0.000e+00 6.365e-08 0.000e+00 2.080e-01 3.08e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 7.34e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 42 42 0 4.0000 4.6667 3 1 716.46 3.853e-03 0.000e+00 3.350e-08 0.000e+00 3.460e-02 4.42e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 42 42 0 4.0000 4.6667 3 1 716.46 3.822e-03 0.000e+00 3.350e-08 0.000e+00 3.675e-02 2.63e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.589e-03 0.000e+00 3.350e-08 0.000e+00 3.446e-02 4.42e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.847e-03 0.000e+00 3.350e-08 0.000e+00 3.523e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.660e-03 0.000e+00 3.350e-08 0.000e+00 3.421e-02 1.09e+01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.762e-03 0.000e+00 3.350e-08 0.000e+00 3.437e-02 5.85e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 42 42 0 4.0000 4.6667 3 1 11463.44 4.030e-03 0.000e+00 3.350e-08 0.000e+00 3.593e-02 1.09e+01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 42 42 0 4.0000 4.6667 3 1 11463.44 3.750e-03 0.000e+00 3.350e-08 0.000e+00 3.486e-02 2.09e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 21536148.3 % faster, avg. sp. ratio 215362.483x, max sp. ratio 376755.947x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2277771.5/188720.5/3332253.2/18222172.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.8/ 8.5/ 15.0/ 94.1 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.8, min. 8.5, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11311376.9 % faster, avg. sp. ratio 113114.769x, max sp. ratio 120306.050x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1046368.3/1021288.3/1097174.4/8370946.6 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 8.9/ 9.6/ 74.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.3, min. 8.9, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.987/ 0.281/ 4.211,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 20.159/ 0.382/ 5.719,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.478/ 2.269/ 15.165,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 67.945/ 2.627/ 20.897,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.16 s, max 0.21 s, tot 1.50 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.16 s, max 0.21 s, tot 1.50 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.491e+02, min 3.771e+02, max 1.508e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.110e-02, min 1.198e-03, max 3.206e-02 (8 samples) -#pr: best tun. rsb operation time was: on avg. 2.862e-07s, min 6.365e-08s, max 9.537e-07s, tot 2.289e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.650e-02s, min 1.198e-02s, max 2.398e-02s, tot 1.320e-01s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.292e+00 1.936e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.133e+00 x, min 2.670e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.28 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.28 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.478e+03, min 7.165e+02, max 1.146e+04 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.921e-02, min 6.229e-03, max 1.024e-01 (8 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 2.680e-07s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.789e-03s, min 3.589e-03s, max 4.030e-03s, tot 3.031e-02s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.058e+00 7.070e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 20 20 0 4.0000 4.6667 3 1 377.09 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.636e-01 2.33e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 20 20 0 4.0000 4.6667 3 1 377.09 2.004e-02 0.000e+00 6.365e-08 0.000e+00 2.121e-01 1.38e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.756e-01 2.33e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.597e-02 0.000e+00 6.365e-08 0.000e+00 1.720e-01 4.21e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 42 42 0 4.0000 4.6667 3 1 716.46 3.853e-03 0.000e+00 3.350e-08 0.000e+00 3.460e-02 4.42e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 42 42 0 4.0000 4.6667 3 1 716.46 3.822e-03 0.000e+00 3.350e-08 0.000e+00 3.675e-02 2.63e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.589e-03 0.000e+00 3.350e-08 0.000e+00 3.446e-02 4.42e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.847e-03 0.000e+00 3.350e-08 0.000e+00 3.523e-02 8.00e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 26735899.3 % faster, avg. sp. ratio 267359.993x, max sp. ratio 314823.001x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2840720.2/2570410.2/3332253.2/11362880.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.6/ 10.2/ 11.0/ 42.5 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.6, min. 10.2, max. 11.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11277480.1 % faster, avg. sp. ratio 112775.801x, max sp. ratio 115017.794x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1052656.6/1028839.9/1097174.4/4210626.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 9.6/ 37.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.137/ 1.194/ 3.457,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.244/ 1.383/ 4.211,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.060/ 2.269/ 6.568,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 19.464/ 2.627/ 8.001,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.16 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.16 s, max 0.21 s, tot 0.72 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.427e+02, min 3.771e+02, max 1.508e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.674e-03, min 1.198e-03, max 6.010e-03 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.365e-08s, min 6.365e-08s, max 6.365e-08s, tot 2.546e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.702e-02s, min 1.597e-02s, max 2.004e-02s, tot 6.807e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.574e+00 4.557e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.791e+03, min 7.165e+02, max 2.866e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.605e-02, min 6.229e-03, max 2.675e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.778e-03s, min 3.589e-03s, max 3.853e-03s, tot 1.511e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.442e+00 7.070e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 #pr: ======== Limiting to both transA=N and nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 20 20 0 4.0000 4.6667 3 1 1508.35 2.398e-02 0.000e+00 6.365e-08 0.000e+00 2.045e-01 5.72e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.601e-02 0.000e+00 6.365e-08 0.000e+00 2.080e-01 3.08e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 7.34e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.660e-03 0.000e+00 3.350e-08 0.000e+00 3.421e-02 1.09e+01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.762e-03 0.000e+00 3.350e-08 0.000e+00 3.437e-02 5.85e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 42 42 0 4.0000 4.6667 3 1 11463.44 4.030e-03 0.000e+00 3.350e-08 0.000e+00 3.593e-02 1.09e+01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 42 42 0 4.0000 4.6667 3 1 11463.44 3.750e-03 0.000e+00 3.350e-08 0.000e+00 3.486e-02 2.09e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 16336397.4 % faster, avg. sp. ratio 163364.974x, max sp. ratio 376755.947x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1714822.8/188720.5/3268728.2/6859291.3 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.9/ 8.5/ 15.0/ 51.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 12.9, min. 8.5, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11345273.7 % faster, avg. sp. ratio 113453.737x, max sp. ratio 120306.050x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1040080.1/1021288.3/1072484.0/4160320.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.2/ 8.9/ 9.3/ 36.7 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.2, min. 8.9, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.837/ 0.281/ 4.211,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 9.914/ 0.382/ 5.719,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 8.896/ 4.418/ 15.165,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 48.481/ 5.851/ 20.897,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.18 s, max 0.21 s, tot 0.77 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.18 s, max 0.21 s, tot 0.77 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.555e+02, min 4.027e+02, max 1.508e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.853e-02, min 4.004e-03, max 3.206e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.087e-07s, min 6.365e-08s, max 9.537e-07s, tot 2.035e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.198e-02s, max 2.398e-02s, tot 6.395e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.292e+00 1.936e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.133e+00 x, min 2.670e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.03 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.03 s, min 0.03 s, max 0.04 s, tot 0.14 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 7.165e+03, min 2.866e+03, max 1.146e+04 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 6.236e-02, min 2.552e-02, max 1.024e-01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 1.340e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.800e-03s, min 3.660e-03s, max 4.030e-03s, tot 1.520e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.058e+00 3.631e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 8) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -5062,56 +5123,56 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 20 20 0 4.0000 4.6667 3 1 377.09 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.636e-01 2.33e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 20 20 0 4.0000 4.6667 3 1 377.09 2.004e-02 0.000e+00 6.365e-08 0.000e+00 2.121e-01 1.38e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.603e-02 0.000e+00 6.365e-08 0.000e+00 1.756e-01 2.33e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.597e-02 0.000e+00 6.365e-08 0.000e+00 1.720e-01 4.21e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 20 20 0 4.0000 4.6667 3 1 1508.35 2.398e-02 0.000e+00 6.365e-08 0.000e+00 2.045e-01 5.72e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 20 20 0 4.0000 4.6667 3 1 1508.35 1.601e-02 0.000e+00 6.365e-08 0.000e+00 2.080e-01 3.08e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 20 20 0 4.0000 4.6667 3 1 402.65 1.198e-02 0.000e+00 9.537e-07 0.000e+00 1.800e-01 7.34e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 42 42 0 4.0000 4.6667 3 1 716.46 3.853e-03 0.000e+00 3.350e-08 0.000e+00 3.460e-02 4.42e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 42 42 0 4.0000 4.6667 3 1 716.46 3.822e-03 0.000e+00 3.350e-08 0.000e+00 3.675e-02 2.63e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.589e-03 0.000e+00 3.350e-08 0.000e+00 3.446e-02 4.42e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.847e-03 0.000e+00 3.350e-08 0.000e+00 3.523e-02 8.00e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.660e-03 0.000e+00 3.350e-08 0.000e+00 3.421e-02 1.09e+01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 42 42 0 4.0000 4.6667 3 1 2865.86 3.762e-03 0.000e+00 3.350e-08 0.000e+00 3.437e-02 5.85e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 42 42 0 4.0000 4.6667 3 1 11463.44 4.030e-03 0.000e+00 3.350e-08 0.000e+00 3.593e-02 1.09e+01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 42 42 0 4.0000 4.6667 3 1 11463.44 3.750e-03 0.000e+00 3.350e-08 0.000e+00 3.486e-02 2.09e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 21536148.3 % faster, avg. sp. ratio 215362.483x, max sp. ratio 376755.947x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2277771.5/188720.5/3332253.2/18222172.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 11.8/ 8.5/ 15.0/ 94.1 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 11.8, min. 8.5, max. 15.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 11311376.9 % faster, avg. sp. ratio 113114.769x, max sp. ratio 120306.050x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1046368.3/1021288.3/1097174.4/8370946.6 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 8.9/ 9.6/ 74.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.3, min. 8.9, max. 9.6 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.987/ 0.281/ 4.211,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 20.159/ 0.382/ 5.719,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.478/ 2.269/ 15.165,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 67.945/ 2.627/ 20.897,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.16 s, max 0.21 s, tot 1.50 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.16 s, max 0.21 s, tot 1.50 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.491e+02, min 3.771e+02, max 1.508e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.110e-02, min 1.198e-03, max 3.206e-02 (8 samples) -#pr: best tun. rsb operation time was: on avg. 2.862e-07s, min 6.365e-08s, max 9.537e-07s, tot 2.289e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.650e-02s, min 1.198e-02s, max 2.398e-02s, tot 1.320e-01s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.292e+00 1.936e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.425e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.133e+00 x, min 2.670e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) -#pr: Record collection took 10.71 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.28 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.04 s, min 0.03 s, max 0.04 s, tot 0.28 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 4.478e+03, min 7.165e+02, max 1.146e+04 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.921e-02, min 6.229e-03, max 1.024e-01 (8 samples) +#pr: best tun. rsb operation time was: on avg. 3.350e-08s, min 3.350e-08s, max 3.350e-08s, tot 2.680e-07s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.789e-03s, min 3.589e-03s, max 4.030e-03s, tot 3.031e-02s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.058e+00 7.070e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.516e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: Record collection took 1.97 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 100 environment variables in 5265 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 100 environment variables in 5318 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1740166919_gcc-14.2.rpr -# Removing the temporary record file rsbench_pr__1740166919_gcc-14.2.rpr.tmp. -# terminating run at 1740166936 (after 17.0s of w.c.t.) -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash ./scripts/doc-tests.sh +#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1774579345_gcc-14.2.rpr +# Removing the temporary record file rsbench_pr__1774579345_gcc-14.2.rpr.tmp. +# terminating run at 1774579353 (after 8.4s of w.c.t.) +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh ./scripts/doc-tests.sh + set -o pipefail + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/autotune.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/io-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/power.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/snippets.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/transpose.c + grep '^.\{71,\}' + true -+ grep '^[^ ].\{80,\}' + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/README ++ grep '^[^ ].\{80,\}' + true ++ /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench -h ++ wc -l @@ -5119,8 +5180,8 @@ ++ wc -c ++ /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench -h + test 2014 -ge 1966 -++ wc -l ++ /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench -oa -Ob -h +++ wc -l + test 182 -ge 157 ++ /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench -oa -Ob -h ++ wc -c @@ -5158,7 +5219,7 @@ type char codes:D S C Z gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' if test /build/reproducible-path/librsb-1.3.0.2+dfsg != /build/reproducible-path/librsb-1.3.0.2+dfsg ; then cp /build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/vf.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/examples ; fi -( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) +( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) + which rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench + BRF=test.rpr @@ -5168,15 +5229,15 @@ Will invoke autotuning for ~10.000000 s x 1 rounds, specifying verbosity=0 and threads=0. (>0 means no structure tuning; 0 means only structure tuning, <0 means tuning of both with (negated) thread count suggestion). # Requested no transposition. # performance record file set to: test.rpr -# beginning run at 1740166937 +# beginning run at 1774579353 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench --lower 100 --as-symmetric --types : -n 1 --notranspose --compare-competitors --verbose --verbose --write-performance-record=test.rpr # compiled with: CC=gcc CFLAGS=-g -O2 -Werror=implicit-function-declaration -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -O3 -std=c99 -# average timer granularity: 6.44e-08 s +# average timer granularity: 3.41e-08 s # Will write a final performance record to file test.rpr and periodic checkpoints to test.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -5215,15 +5276,15 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos1-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 2 (each --verbose occurrence counts +1) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5235,40 +5296,40 @@ # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 6.304s +# Memory benchmark took 6.245s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 4 samples (1120 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 26214 bytes -# so far, program took 6.308s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 4194304 bytes, per-thread 99864 bytes +# so far, program took 6.247s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.614e-01 s (100.00 %) - analyzed arrays in 4.147e-02 s (25.70 %) - cleaned-up arrays in 1.693e-05 s (0.01 %) - deduplicated arrays in 1.502e-05 s (0.01 %) + converted COO to RSB in 3.110e-02 s (100.00 %) + analyzed arrays in 6.408e-03 s (20.60 %) + cleaned-up arrays in 1.097e-05 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.03 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.382e-02 s (51.94 %) - memory allocations took 3.123e-05 s (0.02 %) - leafs setup took 1.979e-05 s (0.01 %) - halfword conversion took 3.599e-02 s (22.30 %) -Built (100 x 100)[0x55b8856958c0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.162s): (100 x 100)[0x55b8856958c0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 1.506e-02 s (48.43 %) + memory allocations took 2.098e-05 s (0.07 %) + leafs setup took 5.007e-06 s (0.02 %) + halfword conversion took 9.581e-03 s (30.81 %) +Built (100 x 100)[0x559bedcc48f0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.031s): (100 x 100)[0x559bedcc48f0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5279,11 +5340,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5294,11 +5355,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5308,16 +5369,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04789s; avg 0.01596s ( +/- 24.83/ 49.54 %); best 0.012s; worst 0.02387s; std dev. 0.005592 (taking best). -Reference operation time is 0.0119991 s (1.683 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.012 Mflops: 1.683) -Merge (3 -> 1 leaves) took w.c.t. of 0.000138s, ~0.0001001s of computing time (of which 5.221e-05s sorting, 1.216e-05s analysis) +3 iterations (1 th.) took 0.01159s; avg 0.003863s ( +/- 2.18/ 3.03 %); best 0.003779s; worst 0.00398s; std dev. 8.541e-05 (taking best). +Reference operation time is 0.00377893 s (5.345 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.003779 Mflops: 5.345) +Merge (3 -> 1 leaves) took w.c.t. of 5.698e-05s, ~4.506e-05s of computing time (of which 1.788e-05s sorting, 4.053e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5328,11 +5389,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5342,14 +5403,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.697e-05s; avg 1.566e-05s ( +/- 23.86/ 47.72 %); best 1.192e-05s; worst 2.313e-05s; std dev. 5.282e-06 (taking best). -Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. +3 iterations (1 th.) took 3.195e-05s; avg 1.065e-05s ( +/- 5.97/ 2.99 %); best 1.001e-05s; worst 1.097e-05s; std dev. 4.496e-07 (taking best). +Reference operation time is 1.00136e-05 s (2017 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5359,25 +5420,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.192e-05 s ~Mflops: 1694.499 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1006.560x: 0.012s -> 1.192e-05s, so taking this instance. +After merge step 1: tpop: 1.001e-05 s ~Mflops: 2017.260 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 377.381x: 0.003779s -> 1.001e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1921s (of which 0.0001428s partitioning, 0.1201s I/O); computing times: 0.0001001s in par. loops, 5.221e-05s sorting, 1.216e-05s analyzing) -Total merge + benchmarking process took 0.1921s, equivalent to 16116.8/16.0 new/old ops (0.1314s for 2 clones -- as 11018.7/10.9 ops, or 5509.3/5.5 ops per clone), SPEEDUP of 1006.560x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1006.560x (0.012s -> 1.192e-05s), will amortize in 16.0 ops by saving 0.01199s per op. -In 1 tuning rounds (tot. 0.3s, 0.13s for constructor, 2 clones) obtained a SPEEDUP of 100556.0% (1007x) (from 1.683 to 1694 Mflops). Employed 0.11s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.03551s (of which 6.199e-05s partitioning, 0.02387s I/O); computing times: 4.506e-05s in par. loops, 1.788e-05s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.03551s, equivalent to 3546.4/9.4 new/old ops (0.02282s for 2 clones -- as 2279.2/6.0 ops, or 1139.6/3.0 ops per clone), SPEEDUP of 377.381x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 377.381x (0.003779s -> 1.001e-05s), will amortize in 9.4 ops by saving 0.003769s per op. +In 1 tuning rounds (tot. 0.058s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 37638.1% (377.4x) (from 5.345 to 2017 Mflops). Employed 0.022s for I/O of matrix plots. #pr: updating sample at index 1 (0^th of 4), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.408061 s (1.200e-02 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0803709 s (3.779e-03 s -> 1.001e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5388,11 +5449,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5403,13 +5464,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 20. +Starting with requested 0 threads ; current default 1 ; at most 42. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5420,11 +5481,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5434,27 +5495,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 6.104e-05s; avg 2.035e-05s ( +/- 30.86/ 61.72 %); best 1.407e-05s; worst 3.29e-05s; std dev. 8.879e-06 (taking best). -Reference operation time is 1.40667e-05 s (1436 Mflops) with 1 threads. +3 iterations (1 th.) took 4.792e-05s; avg 1.597e-05s ( +/- 25.37/ 49.25 %); best 1.192e-05s; worst 2.384e-05s; std dev. 5.564e-06 (taking best). +Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.144e-02 s (100.00 %) - analyzed arrays in 2.345e-02 s (32.83 %) - cleaned-up arrays in 1.597e-05 s (0.02 %) - deduplicated arrays in 1.407e-05 s (0.02 %) + converted COO to RSB in 1.187e-02 s (100.00 %) + analyzed arrays in 5.769e-03 s (48.61 %) + cleaned-up arrays in 9.060e-06 s (0.08 %) + deduplicated arrays in 1.311e-05 s (0.11 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.792e-02 s (67.08 %) - memory allocations took 9.775e-06 s (0.01 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 2.003e-05 s (0.03 %) -Built (100 x 100)[0x55b8856a1310]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 20). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 6.065e-03 s (51.10 %) + memory allocations took 3.815e-06 s (0.03 %) + leafs setup took 1.907e-06 s (0.02 %) + halfword conversion took 5.960e-06 s (0.05 %) +Built (100 x 100)[0x559bedcd3640]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 42). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5465,12 +5526,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5481,22 +5542,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.079e-01 s (100.00 %) - analyzed arrays in 3.586e-02 s (33.24 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + converted COO to RSB in 2.108e-02 s (100.00 %) + analyzed arrays in 5.659e-03 s (26.84 %) + cleaned-up arrays in 2.289e-05 s (0.11 %) + deduplicated arrays in 1.001e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.403e-02 s (22.28 %) - memory allocations took 9.060e-06 s (0.01 %) - leafs setup took 7.153e-06 s (0.01 %) - halfword conversion took 4.792e-02 s (44.43 %) -Built (100 x 100)[0x55b885695c10]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.838e-03 s (37.18 %) + memory allocations took 5.245e-06 s (0.02 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 7.544e-03 s (35.79 %) +Built (100 x 100)[0x559bedcc4c20]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5507,11 +5568,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5522,11 +5583,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5536,16 +5597,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05972s; avg 0.01991s ( +/- 39.97/ 20.82 %); best 0.01195s; worst 0.02405s; std dev. 0.005627 (taking best). -Reference operation time is 0.01195 s (1.69 Mflops) with 1 threads. -Challenging best inner round reference (1.40667e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.01195 s/0 threads (speedup 0.00117713 x), same?n. +3 iterations (1 th.) took 0.01338s; avg 0.004461s ( +/- 14.76/ 29.35 %); best 0.003802s; worst 0.00577s; std dev. 0.0009258 (taking best). +Reference operation time is 0.00380206 s (5.313 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.00380206 s/0 threads (speedup 0.00313539 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5556,12 +5617,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5572,22 +5633,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.198e-01 s (100.00 %) - analyzed arrays in 4.783e-02 s (39.92 %) - cleaned-up arrays in 1.693e-05 s (0.01 %) - deduplicated arrays in 1.502e-05 s (0.01 %) + converted COO to RSB in 1.934e-02 s (100.00 %) + analyzed arrays in 3.905e-03 s (20.19 %) + cleaned-up arrays in 5.007e-06 s (0.03 %) + deduplicated arrays in 6.914e-06 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.600e-02 s (30.04 %) - memory allocations took 1.621e-05 s (0.01 %) - leafs setup took 1.097e-05 s (0.01 %) - halfword conversion took 3.594e-02 s (29.99 %) -Built (100 x 100)[0x55b885696720]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 12, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.717e-03 s (39.90 %) + memory allocations took 3.099e-06 s (0.02 %) + leafs setup took 1.907e-06 s (0.01 %) + halfword conversion took 7.701e-03 s (39.82 %) +Built (100 x 100)[0x559bedcc5610]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5598,11 +5659,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5613,11 +5674,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5627,16 +5688,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04785s; avg 0.01595s ( +/- 24.47/ 47.60 %); best 0.01205s; worst 0.02354s; std dev. 0.005369 (taking best). -Reference operation time is 0.0120471 s (1.677 Mflops) with 1 threads. -Challenging best inner round reference (1.40667e-05 s/1 threads) with: subdivision 0.5, 12 leaves, 2.217 bytes/nz, 0.0120471 s/0 threads (speedup 0.00116765 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 17 subms, 12 lsubms, 2.2170 bpnz +3 iterations (1 th.) took 0.01314s; avg 0.00438s ( +/- 15.79/ 27.80 %); best 0.003688s; worst 0.005597s; std dev. 0.0008635 (taking best). +Reference operation time is 0.0036881 s (5.477 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.0036881 s/0 threads (speedup 0.00323227 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5647,12 +5708,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5663,22 +5724,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.227e-01 s (100.00 %) - analyzed arrays in 3.574e-02 s (29.11 %) - cleaned-up arrays in 1.597e-05 s (0.01 %) - deduplicated arrays in 1.502e-05 s (0.01 %) + converted COO to RSB in 1.933e-02 s (100.00 %) + analyzed arrays in 5.794e-03 s (29.98 %) + cleaned-up arrays in 8.106e-06 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.802e-02 s (39.12 %) - memory allocations took 1.717e-05 s (0.01 %) - leafs setup took 1.907e-05 s (0.02 %) - halfword conversion took 3.893e-02 s (31.71 %) -Built (100 x 100)[0x55b8856a1430]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 27, symflags:'LS' + shuffled partitions in 5.744e-03 s (29.72 %) + memory allocations took 3.815e-06 s (0.02 %) + leafs setup took 1.907e-06 s (0.01 %) + halfword conversion took 7.766e-03 s (40.18 %) +Built (100 x 100)[0x559bedcd3760]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 22, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5689,11 +5750,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5704,11 +5765,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5718,16 +5779,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.07192s; avg 0.02397s ( +/- 0.28/ 0.16 %); best 0.02391s; worst 0.02401s; std dev. 4.726e-05 (taking best). -Reference operation time is 0.023905 s (0.845 Mflops) with 1 threads. -Challenging best inner round reference (1.40667e-05 s/1 threads) with: subdivision 1, 27 leaves, 2.328 bytes/nz, 0.023905 s/0 threads (speedup 0.000588441 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 37 subms, 27 lsubms, 2.3279 bpnz +3 iterations (1 th.) took 0.01147s; avg 0.003823s ( +/- 2.77/ 2.04 %); best 0.003717s; worst 0.003901s; std dev. 7.77e-05 (taking best). +Reference operation time is 0.00371695 s (5.435 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 1, 22 leaves, 2.295 bytes/nz, 0.00371695 s/0 threads (speedup 0.00320718 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 30 subms, 22 lsubms, 2.2947 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5738,12 +5799,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5754,22 +5815,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.478e-01 s (100.00 %) - analyzed arrays in 4.775e-02 s (32.30 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.287e-05 s (0.01 %) + converted COO to RSB in 2.507e-02 s (100.00 %) + analyzed arrays in 1.141e-02 s (45.52 %) + cleaned-up arrays in 8.106e-06 s (0.03 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 6.400e-02 s (43.29 %) - memory allocations took 2.003e-05 s (0.01 %) - leafs setup took 1.597e-05 s (0.01 %) - halfword conversion took 3.600e-02 s (24.36 %) -Built (100 x 100)[0x55b8856c80a0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 58, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.732e-03 s (30.84 %) + memory allocations took 5.722e-06 s (0.02 %) + leafs setup took 5.007e-06 s (0.02 %) + halfword conversion took 5.899e-03 s (23.53 %) +Built (100 x 100)[0x559bedcfa400]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 48, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5780,11 +5841,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5795,11 +5856,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5809,16 +5870,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05978s; avg 0.01993s ( +/- 19.77/ 38.74 %); best 0.01599s; worst 0.02765s; std dev. 0.005459 (taking best). -Reference operation time is 0.0159879 s (1.263 Mflops) with 1 threads. -Challenging best inner round reference (1.40667e-05 s/1 threads) with: subdivision 2, 58 leaves, 2.472 bytes/nz, 0.0159879 s/0 threads (speedup 0.000879835 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 79 subms, 58 lsubms, 2.4721 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 0.01124s; avg 0.003747s ( +/- 2.54/ 3.43 %); best 0.003652s; worst 0.003876s; std dev. 9.441e-05 (taking best). +Reference operation time is 0.0036521 s (5.531 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 2, 48 leaves, 2.434 bytes/nz, 0.0036521 s/0 threads (speedup 0.00326413 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 66 subms, 48 lsubms, 2.4341 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5829,12 +5890,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5845,22 +5906,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.437e-01 s (100.00 %) - analyzed arrays in 5.159e-02 s (35.91 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.407e-05 s (0.01 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 6.003e-02 s (41.78 %) - memory allocations took 3.386e-05 s (0.02 %) - leafs setup took 2.313e-05 s (0.02 %) - halfword conversion took 3.195e-02 s (22.24 %) -Built (100 x 100)[0x55b8856c80a0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 120, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + converted COO to RSB in 2.496e-02 s (100.00 %) + analyzed arrays in 9.512e-03 s (38.11 %) + cleaned-up arrays in 7.868e-06 s (0.03 %) + deduplicated arrays in 1.216e-05 s (0.05 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 7.599e-03 s (30.45 %) + memory allocations took 5.007e-06 s (0.02 %) + leafs setup took 6.914e-06 s (0.03 %) + halfword conversion took 7.812e-03 s (31.30 %) +Built (100 x 100)[0x559bedcfa400]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 103, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5871,11 +5932,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5886,11 +5947,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5900,17 +5961,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06385s; avg 0.02128s ( +/- 24.85/ 49.07 %); best 0.01599s; worst 0.03173s; std dev. 0.007384 (taking best). -Reference operation time is 0.0159941 s (1.263 Mflops) with 1 threads. -Challenging best inner round reference (1.40667e-05 s/1 threads) with: subdivision 4, 120 leaves, 2.726 bytes/nz, 0.0159941 s/0 threads (speedup 0.000879494 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 162 subms, 120 lsubms, 2.7255 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1436.02 Mflops. +3 iterations (1 th.) took 0.01365s; avg 0.004549s ( +/- 13.68/ 27.31 %); best 0.003927s; worst 0.005792s; std dev. 0.0008786 (taking best). +Reference operation time is 0.00392699 s (5.144 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 4, 103 leaves, 2.653 bytes/nz, 0.00392699 s/0 threads (speedup 0.00303564 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 140 subms, 103 lsubms, 2.6535 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1694.5 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5920,23 +5981,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 1.02789 s (eq. to 7e+04/ 7e+04 old/new op.times), gained local/global speedup 1 x (1.40667e-05 : 1.40667e-05) / 1 x (1.40667e-05 : 1.40667e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.185201 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (1.19209e-05 : 1.19209e-05) / 1 x (1.19209e-05 : 1.19209e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 1s, 0.72s for constructor, 0 clones) obtained NO speedup (best stays 1436 Mflops). -Second run of RSB Autotuner took 1.028 s and estimated a speedup of 1.000000 x (1.407e-05 s -> 1.407e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.19s, 0.12s for constructor, 0 clones) obtained NO speedup (best stays 1694 Mflops). +Second run of RSB Autotuner took 0.185268 s and estimated a speedup of 1.000000 x (1.192e-05 s -> 1.192e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.041470 0.083822 0.125292 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.125292 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.041470 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.083822 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.006408 0.015061 0.021469 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.021469 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.006408 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015061 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.125292 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.021469 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -5951,45 +6012,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.161371 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.031101 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.0414698 0 0.083822 -# so far, program took 8.020s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.436s/0.000s . +%constructor:lower-100x100-5050nz 0 0.00640798 0 0.0150609 +# so far, program took 6.570s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.266s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.07914s (system CPU time used) -ru_utime : 22.68s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.1675s (system CPU time used) +ru_utime : 18s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 8.020s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.436s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 6.570s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.266s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.418e-01 s (100.00 %) - analyzed arrays in 2.978e-02 s (21.01 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + converted COO to RSB in 2.797e-02 s (100.00 %) + analyzed arrays in 5.315e-03 s (19.00 %) + cleaned-up arrays in 8.106e-06 s (0.03 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 6.902e-02 s (48.69 %) - memory allocations took 1.192e-05 s (0.01 %) - leafs setup took 5.007e-06 s (0.00 %) - halfword conversion took 4.292e-02 s (30.27 %) -Built (100 x 100)[0x55b8856a1430]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.142s): (100 x 100)[0x55b8856a1430]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 1.506e-02 s (53.85 %) + memory allocations took 3.815e-06 s (0.01 %) + leafs setup took 2.146e-06 s (0.01 %) + halfword conversion took 7.566e-03 s (27.05 %) +Built (100 x 100)[0x559bedcd3760]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.028s): (100 x 100)[0x559bedcd3760]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6000,11 +6061,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6015,11 +6076,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6029,16 +6090,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05195s; avg 0.01732s ( +/- 30.37/ 15.23 %); best 0.01206s; worst 0.01995s; std dev. 0.003718 (taking best). -Reference operation time is 0.012058 s (1.675 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01206 Mflops: 1.675) -Merge (3 -> 1 leaves) took w.c.t. of 9.394e-05s, ~7.2e-05s of computing time (of which 3.099e-05s sorting, 6.914e-06s analysis) +3 iterations (1 th.) took 0.009489s; avg 0.003163s ( +/- 37.18/ 20.74 %); best 0.001987s; worst 0.003819s; std dev. 0.0008334 (taking best). +Reference operation time is 0.00198698 s (10.17 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.001987 Mflops: 10.166) +Merge (3 -> 1 leaves) took w.c.t. of 5.412e-05s, ~4.482e-05s of computing time (of which 1.693e-05s sorting, 2.146e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6049,11 +6110,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6063,14 +6124,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.791e-05s; avg 1.264e-05s ( +/- 13.21/ 26.42 %); best 1.097e-05s; worst 1.597e-05s; std dev. 2.36e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 3.314e-05s; avg 1.105e-05s ( +/- 9.35/ 10.07 %); best 1.001e-05s; worst 1.216e-05s; std dev. 8.778e-07 (taking best). +Reference operation time is 1.00136e-05 s (2017 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6080,25 +6141,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.097e-05 s ~Mflops: 1841.847 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1099.457x: 0.01206s -> 1.097e-05s, so taking this instance. +After merge step 1: tpop: 1.001e-05 s ~Mflops: 2017.260 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 198.429x: 0.001987s -> 1.001e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1806s (of which 9.799e-05s partitioning, 0.1321s I/O); computing times: 7.2e-05s in par. loops, 3.099e-05s sorting, 6.914e-06s analyzing) -Total merge + benchmarking process took 0.1806s, equivalent to 16463.1/15.0 new/old ops (0.1075s for 2 clones -- as 9799.2/8.9 ops, or 4899.6/4.5 ops per clone), SPEEDUP of 1099.457x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1099.457x (0.01206s -> 1.097e-05s), will amortize in 15.0 ops by saving 0.01205s per op. -In 1 tuning rounds (tot. 0.29s, 0.11s for constructor, 2 clones) obtained a SPEEDUP of 109845.7% (1099x) (from 1.675 to 1842 Mflops). Employed 0.12s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.03538s (of which 5.817e-05s partitioning, 0.02388s I/O); computing times: 4.482e-05s in par. loops, 1.693e-05s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.03538s, equivalent to 3533.6/17.8 new/old ops (0.02485s for 2 clones -- as 2481.6/12.5 ops, or 1240.8/6.3 ops per clone), SPEEDUP of 198.429x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 198.429x (0.001987s -> 1.001e-05s), will amortize in 17.9 ops by saving 0.001977s per op. +In 1 tuning rounds (tot. 0.059s, 0.025s for constructor, 2 clones) obtained a SPEEDUP of 19742.9% (198.4x) (from 10.17 to 2017 Mflops). Employed 0.049s for I/O of matrix plots. #pr: updating sample at index 2 (1^th of 4), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.408572 s (1.206e-02 s -> 1.097e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.107312 s (1.987e-03 s -> 1.001e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6109,11 +6170,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6124,13 +6185,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 20. +Starting with requested 0 threads ; current default 1 ; at most 42. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6141,11 +6202,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6155,27 +6216,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 5.221e-05s; avg 1.74e-05s ( +/- 36.99/ 67.12 %); best 1.097e-05s; worst 2.909e-05s; std dev. 8.275e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 3.409e-05s; avg 1.136e-05s ( +/- 11.89/ 15.38 %); best 1.001e-05s; worst 1.311e-05s; std dev. 1.296e-06 (taking best). +Reference operation time is 1.00136e-05 s (2017 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.916e-02 s (100.00 %) - analyzed arrays in 4.308e-02 s (54.42 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.287e-05 s (0.02 %) + converted COO to RSB in 1.322e-02 s (100.00 %) + analyzed arrays in 5.443e-03 s (41.18 %) + cleaned-up arrays in 8.106e-06 s (0.06 %) + deduplicated arrays in 1.192e-05 s (0.09 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.601e-02 s (45.49 %) - memory allocations took 8.106e-06 s (0.01 %) - leafs setup took 1.311e-05 s (0.02 %) - halfword conversion took 1.979e-05 s (0.02 %) -Built (100 x 100)[0x55b8856a1310]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 20). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.610e-03 s (57.58 %) + memory allocations took 1.371e-04 s (1.04 %) + leafs setup took 1.192e-06 s (0.01 %) + halfword conversion took 3.815e-06 s (0.03 %) +Built (100 x 100)[0x559bedcd3640]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 42). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6186,12 +6247,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6202,22 +6263,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.073e-01 s (100.00 %) - analyzed arrays in 3.136e-02 s (29.22 %) - cleaned-up arrays in 1.597e-05 s (0.01 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + converted COO to RSB in 9.557e-03 s (100.00 %) + analyzed arrays in 5.703e-03 s (59.67 %) + cleaned-up arrays in 7.868e-06 s (0.08 %) + deduplicated arrays in 1.097e-05 s (0.11 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.393e-02 s (40.93 %) - memory allocations took 1.597e-05 s (0.01 %) - leafs setup took 8.106e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (29.80 %) -Built (100 x 100)[0x55b885696a60]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 3.828e-03 s (40.05 %) + memory allocations took 3.338e-06 s (0.03 %) + leafs setup took 1.192e-06 s (0.01 %) + halfword conversion took 2.861e-06 s (0.03 %) +Built (100 x 100)[0x559bedcc5610]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6228,11 +6289,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6243,11 +6304,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6257,16 +6318,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05546s; avg 0.01849s ( +/- 37.48/ 29.46 %); best 0.01156s; worst 0.02393s; std dev. 0.00516 (taking best). -Reference operation time is 0.011559 s (1.748 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.011559 s/0 threads (speedup 0.000948806 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 3.386e-05s; avg 1.129e-05s ( +/- 11.27/ 14.08 %); best 1.001e-05s; worst 1.287e-05s; std dev. 1.189e-06 (taking best). +Reference operation time is 1.00136e-05 s (2017 Mflops) with 1 threads. +Challenging best inner round reference (1.00136e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.00136e-05 s/0 threads (speedup 1 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6277,12 +6338,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6293,22 +6354,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 9.984e-02 s (100.00 %) - analyzed arrays in 2.391e-02 s (23.94 %) - cleaned-up arrays in 1.597e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 2.094e-02 s (100.00 %) + analyzed arrays in 5.503e-03 s (26.28 %) + cleaned-up arrays in 7.868e-06 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.598e-02 s (36.04 %) - memory allocations took 1.001e-05 s (0.01 %) - leafs setup took 8.106e-06 s (0.01 %) - halfword conversion took 3.990e-02 s (39.97 %) -Built (100 x 100)[0x55b885696a60]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.853e-03 s (37.50 %) + memory allocations took 3.099e-06 s (0.01 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 7.563e-03 s (36.12 %) +Built (100 x 100)[0x559bedcc5610]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6319,11 +6380,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6334,11 +6395,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6348,16 +6409,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0519s; avg 0.0173s ( +/- 30.53/ 38.61 %); best 0.01202s; worst 0.02398s; std dev. 0.004983 (taking best). -Reference operation time is 0.0120199 s (1.681 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.5, 8 leaves, 2.185 bytes/nz, 0.0120199 s/0 threads (speedup 0.000912427 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 11 subms, 8 lsubms, 2.1846 bpnz +3 iterations (1 th.) took 0.01145s; avg 0.003817s ( +/- 4.26/ 3.89 %); best 0.003654s; worst 0.003965s; std dev. 0.0001274 (taking best). +Reference operation time is 0.003654 s (5.528 Mflops) with 1 threads. +Challenging best inner round reference (1.00136e-05 s/1 threads) with: subdivision 0.5, 6 leaves, 2.163 bytes/nz, 0.003654 s/0 threads (speedup 0.00274044 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6368,12 +6429,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6384,22 +6445,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.438e-01 s (100.00 %) - analyzed arrays in 5.976e-02 s (41.55 %) - cleaned-up arrays in 1.597e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 1.898e-02 s (100.00 %) + analyzed arrays in 5.673e-03 s (29.89 %) + cleaned-up arrays in 9.060e-06 s (0.05 %) + deduplicated arrays in 1.001e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.803e-02 s (33.40 %) - memory allocations took 1.478e-05 s (0.01 %) - leafs setup took 1.407e-05 s (0.01 %) - halfword conversion took 3.596e-02 s (25.01 %) -Built (100 x 100)[0x55b8856a1430]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 19, symflags:'LS' + shuffled partitions in 5.854e-03 s (30.85 %) + memory allocations took 9.537e-07 s (0.01 %) + leafs setup took 2.861e-06 s (0.02 %) + halfword conversion took 7.427e-03 s (39.13 %) +Built (100 x 100)[0x559bedcd3760]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6410,11 +6471,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6425,11 +6486,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6439,16 +6500,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04792s; avg 0.01597s ( +/- 25.35/ 42.29 %); best 0.01192s; worst 0.02273s; std dev. 0.004808 (taking best). -Reference operation time is 0.0119238 s (1.694 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 1, 19 leaves, 2.272 bytes/nz, 0.0119238 s/0 threads (speedup 0.000919779 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 26 subms, 19 lsubms, 2.2725 bpnz +3 iterations (1 th.) took 0.0114s; avg 0.0038s ( +/- 2.03/ 3.19 %); best 0.003723s; worst 0.003921s; std dev. 8.665e-05 (taking best). +Reference operation time is 0.00372291 s (5.426 Mflops) with 1 threads. +Challenging best inner round reference (1.00136e-05 s/1 threads) with: subdivision 1, 16 leaves, 2.25 bytes/nz, 0.00372291 s/0 threads (speedup 0.00268972 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6459,12 +6520,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6475,22 +6536,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.080e-01 s (100.00 %) - analyzed arrays in 4.750e-02 s (43.97 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.287e-05 s (0.01 %) + converted COO to RSB in 3.356e-02 s (100.00 %) + analyzed arrays in 1.838e-02 s (54.76 %) + cleaned-up arrays in 7.868e-06 s (0.02 %) + deduplicated arrays in 1.097e-05 s (0.03 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.603e-02 s (33.35 %) - memory allocations took 1.502e-05 s (0.01 %) - leafs setup took 1.192e-05 s (0.01 %) - halfword conversion took 2.444e-02 s (22.62 %) -Built (100 x 100)[0x55b8856b44e0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 43, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.775e-03 s (23.17 %) + memory allocations took 6.437e-06 s (0.02 %) + leafs setup took 3.099e-06 s (0.01 %) + halfword conversion took 7.377e-03 s (21.98 %) +Built (100 x 100)[0x559bedcd3760]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6501,11 +6562,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6516,11 +6577,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6530,16 +6591,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05946s; avg 0.01982s ( +/- 39.43/ 20.58 %); best 0.012s; worst 0.0239s; std dev. 0.005527 (taking best). -Reference operation time is 0.0120049 s (1.683 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 2, 43 leaves, 2.401 bytes/nz, 0.0120049 s/0 threads (speedup 0.000913568 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 59 subms, 43 lsubms, 2.4008 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 0.01128s; avg 0.00376s ( +/- 5.55/ 4.77 %); best 0.003551s; worst 0.003939s; std dev. 0.0001597 (taking best). +Reference operation time is 0.00355101 s (5.689 Mflops) with 1 threads. +Challenging best inner round reference (1.00136e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.00355101 s/0 threads (speedup 0.00281993 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6550,12 +6611,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6566,22 +6627,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.316e-01 s (100.00 %) - analyzed arrays in 5.953e-02 s (45.25 %) - cleaned-up arrays in 1.621e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 2.518e-02 s (100.00 %) + analyzed arrays in 9.653e-03 s (38.33 %) + cleaned-up arrays in 1.001e-05 s (0.04 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.604e-02 s (27.39 %) - memory allocations took 2.670e-05 s (0.02 %) - leafs setup took 1.812e-05 s (0.01 %) - halfword conversion took 3.592e-02 s (27.30 %) -Built (100 x 100)[0x55b8856b44e0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 90, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.752e-03 s (30.78 %) + memory allocations took 4.768e-06 s (0.02 %) + leafs setup took 5.960e-06 s (0.02 %) + halfword conversion took 7.747e-03 s (30.76 %) +Built (100 x 100)[0x559bedce6840]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 78, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6592,11 +6653,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6607,11 +6668,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6621,17 +6682,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04791s; avg 0.01597s ( +/- 24.85/ 49.62 %); best 0.012s; worst 0.0239s; std dev. 0.005604 (taking best). -Reference operation time is 0.012001 s (1.683 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 4, 90 leaves, 2.605 bytes/nz, 0.012001 s/0 threads (speedup 0.000913859 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 122 subms, 90 lsubms, 2.6051 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1841.85 Mflops. +3 iterations (1 th.) took 0.01132s; avg 0.003775s ( +/- 1.69/ 1.60 %); best 0.003711s; worst 0.003835s; std dev. 5.076e-05 (taking best). +Reference operation time is 0.00371099 s (5.443 Mflops) with 1 threads. +Challenging best inner round reference (1.00136e-05 s/1 threads) with: subdivision 4, 78 leaves, 2.555 bytes/nz, 0.00371099 s/0 threads (speedup 0.00269836 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 106 subms, 78 lsubms, 2.5552 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 2017.26 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6641,23 +6702,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.935813 s (eq. to 9e+04/ 9e+04 old/new op.times), gained local/global speedup 1 x (1.09673e-05 : 1.09673e-05) / 1 x (1.09673e-05 : 1.09673e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.168082 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (1.00136e-05 : 1.00136e-05) / 1 x (1.00136e-05 : 1.00136e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.94s, 0.67s for constructor, 0 clones) obtained NO speedup (best stays 1842 Mflops). -Second run of RSB Autotuner took 0.935942 s and estimated a speedup of 1.000000 x (1.097e-05 s -> 1.097e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.17s, 0.12s for constructor, 0 clones) obtained NO speedup (best stays 2017 Mflops). +Second run of RSB Autotuner took 0.16813 s and estimated a speedup of 1.000000 x (1.001e-05 s -> 1.001e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.029783 0.069023 0.098806 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.098806 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.029783 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.069023 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.005315 0.015060 0.020375 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.020375 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.005315 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015060 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.098806 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.020375 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -6672,45 +6733,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.141772 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.027967 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.029783 0 0.0690229 -# so far, program took 9.603s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.781s/0.000s . +%constructor:lower-100x100-5050nz 0 0.00531507 0 0.0150599 +# so far, program took 6.895s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.541s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.08987s (system CPU time used) -ru_utime : 38.17s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.1807s (system CPU time used) +ru_utime : 29.08s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 9.603s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.781s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 6.895s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.541s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.351e-01 s (100.00 %) - analyzed arrays in 3.503e-02 s (25.94 %) - cleaned-up arrays in 1.788e-05 s (0.01 %) - deduplicated arrays in 1.502e-05 s (0.01 %) + converted COO to RSB in 2.847e-02 s (100.00 %) + analyzed arrays in 5.511e-03 s (19.36 %) + cleaned-up arrays in 7.868e-06 s (0.03 %) + deduplicated arrays in 1.001e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.996e-02 s (44.40 %) - memory allocations took 1.168e-05 s (0.01 %) - leafs setup took 9.060e-06 s (0.01 %) - halfword conversion took 4.001e-02 s (29.63 %) -Built (100 x 100)[0x55b8856a1430]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.135s): (100 x 100)[0x55b8856a1430]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 1.513e-02 s (53.15 %) + memory allocations took 4.053e-06 s (0.01 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 7.803e-03 s (27.41 %) +Built (100 x 100)[0x559bedcd3760]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.029s): (100 x 100)[0x559bedcd3760]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6721,11 +6782,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6736,11 +6797,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6750,16 +6811,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05597s; avg 0.01866s ( +/- 14.39/ 28.54 %); best 0.01597s; worst 0.02398s; std dev. 0.003766 (taking best). -Reference operation time is 0.0159729 s (5.059 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01597 Mflops: 5.059) -Merge (3 -> 1 leaves) took w.c.t. of 9.608e-05s, ~7.606e-05s of computing time (of which 3.719e-05s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 0.01136s; avg 0.003788s ( +/- 3.51/ 3.00 %); best 0.003655s; worst 0.003902s; std dev. 0.0001017 (taking best). +Reference operation time is 0.0036552 s (22.11 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.003655 Mflops: 22.106) +Merge (3 -> 1 leaves) took w.c.t. of 5.388e-05s, ~4.601e-05s of computing time (of which 1.907e-05s sorting, 1.907e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6770,11 +6831,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6784,14 +6845,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000113s; avg 3.767e-05s ( +/- 10.13/ 17.09 %); best 3.386e-05s; worst 4.411e-05s; std dev. 4.578e-06 (taking best). -Reference operation time is 3.38554e-05 s (2387 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001371s; avg 4.57e-05s ( +/- 12.35/ 20.52 %); best 4.005e-05s; worst 5.507e-05s; std dev. 6.677e-06 (taking best). +Reference operation time is 4.00543e-05 s (2017 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6801,25 +6862,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 3.386e-05 s ~Mflops: 2386.618 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 471.796x: 0.01597s -> 3.386e-05s, so taking this instance. +After merge step 1: tpop: 4.005e-05 s ~Mflops: 2017.260 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 91.256x: 0.003655s -> 4.005e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.2281s (of which 0.0001009s partitioning, 0.1367s I/O); computing times: 7.606e-05s in par. loops, 3.719e-05s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.2281s, equivalent to 6738.6/14.3 new/old ops (0.1545s for 2 clones -- as 4563.3/9.7 ops, or 2281.7/4.8 ops per clone), SPEEDUP of 471.796x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 471.796x (0.01597s -> 3.386e-05s), will amortize in 14.3 ops by saving 0.01594s per op. -In 1 tuning rounds (tot. 0.35s, 0.15s for constructor, 2 clones) obtained a SPEEDUP of 47079.6% (471.8x) (from 5.059 to 2387 Mflops). Employed 0.12s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.03573s (of which 0.0002279s partitioning, 0.02421s I/O); computing times: 4.601e-05s in par. loops, 1.907e-05s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.03573s, equivalent to 892.2/9.8 new/old ops (0.02246s for 2 clones -- as 560.8/6.1 ops, or 280.4/3.1 ops per clone), SPEEDUP of 91.256x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 91.256x (0.003655s -> 4.005e-05s), will amortize in 9.9 ops by saving 0.003615s per op. +In 1 tuning rounds (tot. 0.058s, 0.022s for constructor, 2 clones) obtained a SPEEDUP of 9025.6% (91.26x) (from 22.11 to 2017 Mflops). Employed 0.024s for I/O of matrix plots. #pr: updating sample at index 3 (2^th of 4), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.468045 s (1.597e-02 s -> 3.386e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.082109 s (3.655e-03 s -> 4.005e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6830,11 +6891,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6845,13 +6906,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 20. +Starting with requested 0 threads ; current default 1 ; at most 42. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6862,11 +6923,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6876,27 +6937,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001202s; avg 4.005e-05s ( +/- 10.12/ 20.24 %); best 3.6e-05s; worst 4.816e-05s; std dev. 5.732e-06 (taking best). -Reference operation time is 3.60012e-05 s (2244 Mflops) with 1 threads. +3 iterations (1 th.) took 0.000123s; avg 4.101e-05s ( +/- 2.33/ 4.65 %); best 4.005e-05s; worst 4.292e-05s; std dev. 1.349e-06 (taking best). +Reference operation time is 4.00543e-05 s (2017 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.552e-02 s (100.00 %) - analyzed arrays in 3.269e-02 s (43.29 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.192e-05 s (0.02 %) + converted COO to RSB in 1.305e-02 s (100.00 %) + analyzed arrays in 5.432e-03 s (41.62 %) + cleaned-up arrays in 8.106e-06 s (0.06 %) + deduplicated arrays in 1.192e-05 s (0.09 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.277e-02 s (56.63 %) - memory allocations took 6.914e-06 s (0.01 %) - leafs setup took 7.868e-06 s (0.01 %) - halfword conversion took 1.717e-05 s (0.02 %) -Built (100 x 100)[0x55b8856a1310]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 20). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.590e-03 s (58.16 %) + memory allocations took 1.907e-06 s (0.01 %) + leafs setup took 9.537e-07 s (0.01 %) + halfword conversion took 5.007e-06 s (0.04 %) +Built (100 x 100)[0x559bedcd3640]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 42). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6907,12 +6968,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6923,22 +6984,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.198e-01 s (100.00 %) - analyzed arrays in 3.579e-02 s (29.87 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.420e-02 s (36.90 %) - memory allocations took 1.192e-05 s (0.01 %) - leafs setup took 7.868e-06 s (0.01 %) - halfword conversion took 3.977e-02 s (33.19 %) -Built (100 x 100)[0x55b8856a1430]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + converted COO to RSB in 2.074e-02 s (100.00 %) + analyzed arrays in 5.528e-03 s (26.65 %) + cleaned-up arrays in 9.060e-06 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.05 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 7.603e-03 s (36.66 %) + memory allocations took 6.413e-05 s (0.31 %) + leafs setup took 1.907e-06 s (0.01 %) + halfword conversion took 7.522e-03 s (36.27 %) +Built (100 x 100)[0x559bedcc5610]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6949,11 +7010,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6964,11 +7025,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6978,16 +7039,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05575s; avg 0.01858s ( +/- 37.63/ 52.96 %); best 0.01159s; worst 0.02843s; std dev. 0.007162 (taking best). -Reference operation time is 0.011591 s (6.971 Mflops) with 1 threads. -Challenging best inner round reference (3.60012e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.011591 s/0 threads (speedup 0.00310597 x), same?n. +3 iterations (1 th.) took 0.01135s; avg 0.003785s ( +/- 2.63/ 1.93 %); best 0.003685s; worst 0.003858s; std dev. 7.301e-05 (taking best). +Reference operation time is 0.003685 s (21.93 Mflops) with 1 threads. +Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.003685 s/0 threads (speedup 0.0108696 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6998,12 +7059,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7014,22 +7075,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.600e-01 s (100.00 %) - analyzed arrays in 6.483e-02 s (40.51 %) - cleaned-up arrays in 1.597e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 2.074e-02 s (100.00 %) + analyzed arrays in 5.663e-03 s (27.31 %) + cleaned-up arrays in 8.106e-06 s (0.04 %) + deduplicated arrays in 1.097e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.519e-02 s (21.99 %) - memory allocations took 1.597e-05 s (0.01 %) - leafs setup took 8.821e-06 s (0.01 %) - halfword conversion took 5.996e-02 s (37.47 %) -Built (100 x 100)[0x55b8856a1430]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 12, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.436e-03 s (35.86 %) + memory allocations took 7.010e-05 s (0.34 %) + leafs setup took 2.146e-06 s (0.01 %) + halfword conversion took 7.548e-03 s (36.40 %) +Built (100 x 100)[0x559bedcc5610]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7040,11 +7101,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7055,11 +7116,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7069,16 +7130,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06323s; avg 0.02108s ( +/- 21.25/ 13.26 %); best 0.0166s; worst 0.02387s; std dev. 0.003199 (taking best). -Reference operation time is 0.016597 s (4.868 Mflops) with 1 threads. -Challenging best inner round reference (3.60012e-05 s/1 threads) with: subdivision 0.5, 12 leaves, 2.217 bytes/nz, 0.016597 s/0 threads (speedup 0.00216914 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 17 subms, 12 lsubms, 2.2170 bpnz +3 iterations (1 th.) took 0.01131s; avg 0.00377s ( +/- 1.42/ 1.15 %); best 0.003716s; worst 0.003813s; std dev. 4.027e-05 (taking best). +Reference operation time is 0.00371599 s (21.74 Mflops) with 1 threads. +Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.00371599 s/0 threads (speedup 0.0107789 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7089,12 +7150,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7105,22 +7166,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.479e-01 s (100.00 %) - analyzed arrays in 6.068e-02 s (41.03 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 2.065e-02 s (100.00 %) + analyzed arrays in 5.692e-03 s (27.57 %) + cleaned-up arrays in 7.868e-06 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.112e-02 s (34.57 %) - memory allocations took 2.193e-05 s (0.01 %) - leafs setup took 1.001e-05 s (0.01 %) - halfword conversion took 3.602e-02 s (24.36 %) -Built (100 x 100)[0x55b8856a1430]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 27, symflags:'LS' + shuffled partitions in 7.409e-03 s (35.88 %) + memory allocations took 1.526e-05 s (0.07 %) + leafs setup took 2.146e-06 s (0.01 %) + halfword conversion took 7.512e-03 s (36.38 %) +Built (100 x 100)[0x559bedcd3760]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 22, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7131,11 +7192,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7146,11 +7207,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7160,16 +7221,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06344s; avg 0.02115s ( +/- 26.41/ 44.07 %); best 0.01556s; worst 0.03047s; std dev. 0.006633 (taking best). -Reference operation time is 0.0155621 s (5.192 Mflops) with 1 threads. -Challenging best inner round reference (3.60012e-05 s/1 threads) with: subdivision 1, 27 leaves, 2.328 bytes/nz, 0.0155621 s/0 threads (speedup 0.0023134 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 37 subms, 27 lsubms, 2.3279 bpnz +3 iterations (1 th.) took 0.01132s; avg 0.003772s ( +/- 1.49/ 1.06 %); best 0.003716s; worst 0.003812s; std dev. 4.082e-05 (taking best). +Reference operation time is 0.00371599 s (21.74 Mflops) with 1 threads. +Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 1, 22 leaves, 2.295 bytes/nz, 0.00371599 s/0 threads (speedup 0.0107789 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 30 subms, 22 lsubms, 2.2947 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7180,12 +7241,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7196,22 +7257,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.208e-01 s (100.00 %) - analyzed arrays in 4.941e-02 s (40.91 %) - cleaned-up arrays in 1.407e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 2.509e-02 s (100.00 %) + analyzed arrays in 9.643e-03 s (38.43 %) + cleaned-up arrays in 9.060e-06 s (0.04 %) + deduplicated arrays in 1.001e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.221e-02 s (26.67 %) - memory allocations took 2.384e-05 s (0.02 %) - leafs setup took 1.407e-05 s (0.01 %) - halfword conversion took 3.910e-02 s (32.37 %) -Built (100 x 100)[0x55b8856c80a0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 57, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.754e-03 s (30.90 %) + memory allocations took 2.861e-06 s (0.01 %) + leafs setup took 4.053e-06 s (0.02 %) + halfword conversion took 7.671e-03 s (30.57 %) +Built (100 x 100)[0x559bedcfa400]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 48, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7222,11 +7283,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7237,11 +7298,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7251,16 +7312,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.07058s; avg 0.02353s ( +/- 49.12/ 64.13 %); best 0.01197s; worst 0.03861s; std dev. 0.01116 (taking best). -Reference operation time is 0.0119691 s (6.751 Mflops) with 1 threads. -Challenging best inner round reference (3.60012e-05 s/1 threads) with: subdivision 2, 57 leaves, 2.47 bytes/nz, 0.0119691 s/0 threads (speedup 0.00300785 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 78 subms, 57 lsubms, 2.4705 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 0.01125s; avg 0.003751s ( +/- 2.75/ 1.91 %); best 0.003648s; worst 0.003823s; std dev. 7.486e-05 (taking best). +Reference operation time is 0.00364804 s (22.15 Mflops) with 1 threads. +Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 2, 48 leaves, 2.434 bytes/nz, 0.00364804 s/0 threads (speedup 0.0109797 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 66 subms, 48 lsubms, 2.4341 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7271,12 +7332,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7287,22 +7348,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.718e-01 s (100.00 %) - analyzed arrays in 7.979e-02 s (46.43 %) - cleaned-up arrays in 1.407e-05 s (0.01 %) - deduplicated arrays in 1.287e-05 s (0.01 %) + converted COO to RSB in 2.482e-02 s (100.00 %) + analyzed arrays in 9.697e-03 s (39.07 %) + cleaned-up arrays in 8.106e-06 s (0.03 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.810e-02 s (27.99 %) - memory allocations took 2.384e-05 s (0.01 %) - leafs setup took 4.292e-05 s (0.02 %) - halfword conversion took 4.386e-02 s (25.52 %) -Built (100 x 100)[0x55b8856c80a0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 120, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.539e-03 s (30.37 %) + memory allocations took 5.960e-06 s (0.02 %) + leafs setup took 7.153e-06 s (0.03 %) + halfword conversion took 7.554e-03 s (30.43 %) +Built (100 x 100)[0x559bedcfa400]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 102, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7313,11 +7374,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7328,11 +7389,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7342,17 +7403,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.07147s; avg 0.02382s ( +/- 20.66/ 34.28 %); best 0.0189s; worst 0.03199s; std dev. 0.005816 (taking best). -Reference operation time is 0.0189021 s (4.275 Mflops) with 1 threads. -Challenging best inner round reference (3.60012e-05 s/1 threads) with: subdivision 4, 120 leaves, 2.726 bytes/nz, 0.0189021 s/0 threads (speedup 0.00190462 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 162 subms, 120 lsubms, 2.7255 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 2244.37 Mflops. +3 iterations (1 th.) took 0.01126s; avg 0.003753s ( +/- 0.91/ 1.49 %); best 0.003719s; worst 0.003809s; std dev. 3.993e-05 (taking best). +Reference operation time is 0.00371885 s (21.73 Mflops) with 1 threads. +Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 4, 102 leaves, 2.647 bytes/nz, 0.00371885 s/0 threads (speedup 0.0107706 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 139 subms, 102 lsubms, 2.6471 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 2017.26 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7362,23 +7423,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 1.1238 s (eq. to 3e+04/ 3e+04 old/new op.times), gained local/global speedup 1 x (3.60012e-05 : 3.60012e-05) / 1 x (3.60012e-05 : 3.60012e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.182542 s (eq. to 5e+03/ 5e+03 old/new op.times), gained local/global speedup 1 x (4.00543e-05 : 4.00543e-05) / 1 x (4.00543e-05 : 4.00543e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 1.1s, 0.8s for constructor, 0 clones) obtained NO speedup (best stays 2244 Mflops). -Second run of RSB Autotuner took 1.1239 s and estimated a speedup of 1.000000 x (3.600e-05 s -> 3.600e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.18s, 0.13s for constructor, 0 clones) obtained NO speedup (best stays 2017 Mflops). +Second run of RSB Autotuner took 0.182632 s and estimated a speedup of 1.000000 x (4.005e-05 s -> 4.005e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.035031 0.059960 0.094991 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.094991 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.035031 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.059960 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.005511 0.015131 0.020642 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.020642 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.005511 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015131 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.094991 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.020642 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -7393,45 +7454,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.13506 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.0284691 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.0350311 0 0.0599601 -# so far, program took 11.454s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.373s/0.000s . +%constructor:lower-100x100-5050nz 0 0.00551105 0 0.015131 +# so far, program took 7.212s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.806s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.09382s (system CPU time used) -ru_utime : 54.52s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.1894s (system CPU time used) +ru_utime : 41.03s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was D). -# Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 11.454s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.373s/0.000s . +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# so far, program took 7.212s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.806s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.796e-01 s (100.00 %) - analyzed arrays in 2.762e-02 s (15.38 %) - cleaned-up arrays in 1.407e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 2.899e-02 s (100.00 %) + analyzed arrays in 5.664e-03 s (19.54 %) + cleaned-up arrays in 7.868e-06 s (0.03 %) + deduplicated arrays in 1.216e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 9.595e-02 s (53.43 %) - memory allocations took 1.574e-05 s (0.01 %) - leafs setup took 7.153e-06 s (0.00 %) - halfword conversion took 5.597e-02 s (31.16 %) -Built (100 x 100)[0x55b8856a1430]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.180s): (100 x 100)[0x55b8856a1430]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 1.577e-02 s (54.40 %) + memory allocations took 6.914e-06 s (0.02 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 7.525e-03 s (25.96 %) +Built (100 x 100)[0x559bedcd3760]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.029s): (100 x 100)[0x559bedcd3760]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7442,11 +7503,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7457,11 +7518,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7471,16 +7532,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06777s; avg 0.02259s ( +/- 12.44/ 6.63 %); best 0.01978s; worst 0.02409s; std dev. 0.001988 (taking best). -Reference operation time is 0.0197802 s (4.085 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01978 Mflops: 4.085) -Merge (3 -> 1 leaves) took w.c.t. of 0.0001411s, ~0.000113s of computing time (of which 6.914e-05s sorting, 7.868e-06s analysis) +3 iterations (1 th.) took 0.01185s; avg 0.00395s ( +/- 10.13/ 18.21 %); best 0.00355s; worst 0.00467s; std dev. 0.0005099 (taking best). +Reference operation time is 0.00355005 s (22.76 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.00355 Mflops: 22.760) +Merge (3 -> 1 leaves) took w.c.t. of 0.0001249s, ~0.000108s of computing time (of which 7.892e-05s sorting, 4.053e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7491,11 +7552,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7505,14 +7566,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000114s; avg 3.799e-05s ( +/- 10.88/ 18.62 %); best 3.386e-05s; worst 4.506e-05s; std dev. 5.025e-06 (taking best). -Reference operation time is 3.38554e-05 s (2387 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001311s; avg 4.371e-05s ( +/- 4.00/ 5.27 %); best 4.196e-05s; worst 4.601e-05s; std dev. 1.701e-06 (taking best). +Reference operation time is 4.19617e-05 s (1926 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7522,25 +7583,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 3.386e-05 s ~Mflops: 2386.618 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 584.254x: 0.01978s -> 3.386e-05s, so taking this instance. +After merge step 1: tpop: 4.196e-05 s ~Mflops: 1925.567 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 84.602x: 0.00355s -> 4.196e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.2442s (of which 0.000145s partitioning, 0.1561s I/O); computing times: 0.000113s in par. loops, 6.914e-05s sorting, 7.868e-06s analyzing) -Total merge + benchmarking process took 0.2442s, equivalent to 7214.0/12.3 new/old ops (0.1637s for 2 clones -- as 4834.7/8.3 ops, or 2417.3/4.1 ops per clone), SPEEDUP of 584.254x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 584.254x (0.01978s -> 3.386e-05s), will amortize in 12.4 ops by saving 0.01975s per op. -In 1 tuning rounds (tot. 0.39s, 0.16s for constructor, 2 clones) obtained a SPEEDUP of 58325.4% (584.3x) (from 4.085 to 2387 Mflops). Employed 0.14s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04223s (of which 0.0001309s partitioning, 0.03013s I/O); computing times: 0.000108s in par. loops, 7.892e-05s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.04223s, equivalent to 1006.3/11.9 new/old ops (0.02257s for 2 clones -- as 538.0/6.4 ops, or 269.0/3.2 ops per clone), SPEEDUP of 84.602x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 84.602x (0.00355s -> 4.196e-05s), will amortize in 12.0 ops by saving 0.003508s per op. +In 1 tuning rounds (tot. 0.065s, 0.023s for constructor, 2 clones) obtained a SPEEDUP of 8360.2% (84.6x) (from 22.76 to 1926 Mflops). Employed 0.027s for I/O of matrix plots. #pr: updating sample at index 4 (3^th of 4), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.528252 s (1.978e-02 s -> 3.386e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.0917048 s (3.550e-03 s -> 4.196e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7551,11 +7612,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7566,13 +7627,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 20. +Starting with requested 0 threads ; current default 1 ; at most 42. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7583,11 +7644,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7597,27 +7658,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001411s; avg 4.705e-05s ( +/- 14.86/ 29.73 %); best 4.005e-05s; worst 6.104e-05s; std dev. 9.89e-06 (taking best). -Reference operation time is 4.00543e-05 s (2017 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001252s; avg 4.172e-05s ( +/- 4.57/ 3.43 %); best 3.982e-05s; worst 4.315e-05s; std dev. 1.404e-06 (taking best). +Reference operation time is 3.98159e-05 s (2029 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.072e-01 s (100.00 %) - analyzed arrays in 3.522e-02 s (32.84 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 1.289e-02 s (100.00 %) + analyzed arrays in 5.353e-03 s (41.51 %) + cleaned-up arrays in 9.775e-06 s (0.08 %) + deduplicated arrays in 1.001e-05 s (0.08 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 7.195e-02 s (67.10 %) - memory allocations took 8.821e-06 s (0.01 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 1.788e-05 s (0.02 %) -Built (100 x 100)[0x55b8856a1310]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 20). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.510e-03 s (58.24 %) + memory allocations took 4.292e-06 s (0.03 %) + leafs setup took 9.537e-07 s (0.01 %) + halfword conversion took 4.053e-06 s (0.03 %) +Built (100 x 100)[0x559bedcd3640]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 42). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7628,12 +7689,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7644,22 +7705,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.437e-01 s (100.00 %) - analyzed arrays in 4.378e-02 s (30.46 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 2.103e-02 s (100.00 %) + analyzed arrays in 5.664e-03 s (26.94 %) + cleaned-up arrays in 8.106e-06 s (0.04 %) + deduplicated arrays in 1.192e-05 s (0.06 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.594e-02 s (38.92 %) - memory allocations took 1.025e-05 s (0.01 %) - leafs setup took 5.007e-06 s (0.00 %) - halfword conversion took 4.398e-02 s (30.60 %) -Built (100 x 100)[0x55b8856a1430]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.765e-03 s (36.93 %) + memory allocations took 1.907e-06 s (0.01 %) + leafs setup took 1.907e-06 s (0.01 %) + halfword conversion took 7.574e-03 s (36.02 %) +Built (100 x 100)[0x559bedcd3760]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7670,11 +7731,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7685,11 +7746,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7699,16 +7760,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06793s; avg 0.02264s ( +/- 46.53/ 40.93 %); best 0.01211s; worst 0.03191s; std dev. 0.008134 (taking best). -Reference operation time is 0.0121062 s (6.674 Mflops) with 1 threads. -Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 0.25, 8 leaves, 2.185 bytes/nz, 0.0121062 s/0 threads (speedup 0.00330858 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 11 subms, 8 lsubms, 2.1846 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 0.01115s; avg 0.003718s ( +/- 1.64/ 0.97 %); best 0.003657s; worst 0.003754s; std dev. 4.328e-05 (taking best). +Reference operation time is 0.0036571 s (22.09 Mflops) with 1 threads. +Challenging best inner round reference (3.98159e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 0.0036571 s/0 threads (speedup 0.0108873 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7719,12 +7780,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7735,22 +7796,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.396e-01 s (100.00 %) - analyzed arrays in 5.553e-02 s (39.79 %) - cleaned-up arrays in 1.311e-05 s (0.01 %) - deduplicated arrays in 1.097e-05 s (0.01 %) + converted COO to RSB in 2.101e-02 s (100.00 %) + analyzed arrays in 5.821e-03 s (27.71 %) + cleaned-up arrays in 8.106e-06 s (0.04 %) + deduplicated arrays in 1.097e-05 s (0.05 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.607e-02 s (40.17 %) - memory allocations took 1.287e-05 s (0.01 %) - leafs setup took 8.106e-06 s (0.01 %) - halfword conversion took 2.793e-02 s (20.01 %) -Built (100 x 100)[0x55b8856a1430]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 19, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.689e-03 s (36.60 %) + memory allocations took 3.815e-06 s (0.02 %) + leafs setup took 1.907e-06 s (0.01 %) + halfword conversion took 7.474e-03 s (35.58 %) +Built (100 x 100)[0x559bedcd3760]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7761,11 +7822,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7776,11 +7837,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7790,16 +7851,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.07191s; avg 0.02397s ( +/- 15.95/ 16.29 %); best 0.02015s; worst 0.02788s; std dev. 0.003156 (taking best). -Reference operation time is 0.0201468 s (4.011 Mflops) with 1 threads. -Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 0.5, 19 leaves, 2.272 bytes/nz, 0.0201468 s/0 threads (speedup 0.00198812 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 26 subms, 19 lsubms, 2.2725 bpnz +3 iterations (1 th.) took 0.01123s; avg 0.003745s ( +/- 4.74/ 3.24 %); best 0.003567s; worst 0.003866s; std dev. 0.0001283 (taking best). +Reference operation time is 0.00356722 s (22.65 Mflops) with 1 threads. +Challenging best inner round reference (3.98159e-05 s/1 threads) with: subdivision 0.5, 16 leaves, 2.25 bytes/nz, 0.00356722 s/0 threads (speedup 0.0111616 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7810,12 +7871,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7826,22 +7887,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.153e-01 s (100.00 %) - analyzed arrays in 5.127e-02 s (44.45 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 2.510e-02 s (100.00 %) + analyzed arrays in 9.616e-03 s (38.32 %) + cleaned-up arrays in 8.106e-06 s (0.03 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.205e-02 s (27.79 %) - memory allocations took 1.526e-05 s (0.01 %) - leafs setup took 1.287e-05 s (0.01 %) - halfword conversion took 3.196e-02 s (27.71 %) -Built (100 x 100)[0x55b8856ef7f0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 39, symflags:'LS' + shuffled partitions in 7.787e-03 s (31.03 %) + memory allocations took 5.960e-06 s (0.02 %) + leafs setup took 4.053e-06 s (0.02 %) + halfword conversion took 7.665e-03 s (30.54 %) +Built (100 x 100)[0x559bedcd3760]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7852,11 +7913,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7867,11 +7928,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7881,16 +7942,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05594s; avg 0.01865s ( +/- 34.98/ 28.31 %); best 0.01212s; worst 0.02393s; std dev. 0.004898 (taking best). -Reference operation time is 0.0121229 s (6.665 Mflops) with 1 threads. -Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 1, 39 leaves, 2.377 bytes/nz, 0.0121229 s/0 threads (speedup 0.00330403 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 54 subms, 39 lsubms, 2.3770 bpnz +3 iterations (1 th.) took 0.01219s; avg 0.004062s ( +/- 9.84/ 16.67 %); best 0.003662s; worst 0.004739s; std dev. 0.0004814 (taking best). +Reference operation time is 0.00366211 s (22.06 Mflops) with 1 threads. +Challenging best inner round reference (3.98159e-05 s/1 threads) with: subdivision 1, 36 leaves, 2.383 bytes/nz, 0.00366211 s/0 threads (speedup 0.0108724 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7901,12 +7962,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7917,22 +7978,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.114e-01 s (100.00 %) - analyzed arrays in 4.342e-02 s (38.97 %) - cleaned-up arrays in 1.192e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 2.515e-02 s (100.00 %) + analyzed arrays in 9.712e-03 s (38.62 %) + cleaned-up arrays in 7.153e-06 s (0.03 %) + deduplicated arrays in 1.097e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.832e-02 s (25.41 %) - memory allocations took 2.408e-05 s (0.02 %) - leafs setup took 1.907e-05 s (0.02 %) - halfword conversion took 3.962e-02 s (35.56 %) -Built (100 x 100)[0x55b8856ef7f0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 84, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.809e-03 s (31.06 %) + memory allocations took 4.768e-06 s (0.02 %) + leafs setup took 5.960e-06 s (0.02 %) + halfword conversion took 7.596e-03 s (30.21 %) +Built (100 x 100)[0x559bedd21b50]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 76, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7943,11 +8004,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7958,11 +8019,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7972,16 +8033,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.06787s; avg 0.02262s ( +/- 46.88/ 41.22 %); best 0.01202s; worst 0.03195s; std dev. 0.008187 (taking best). -Reference operation time is 0.0120192 s (6.723 Mflops) with 1 threads. -Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 2, 84 leaves, 2.58 bytes/nz, 0.0120192 s/0 threads (speedup 0.00333254 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 114 subms, 84 lsubms, 2.5798 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +3 iterations (1 th.) took 0.01121s; avg 0.003737s ( +/- 2.56/ 2.36 %); best 0.003641s; worst 0.003825s; std dev. 7.533e-05 (taking best). +Reference operation time is 0.00364089 s (22.19 Mflops) with 1 threads. +Challenging best inner round reference (3.98159e-05 s/1 threads) with: subdivision 2, 76 leaves, 2.545 bytes/nz, 0.00364089 s/0 threads (speedup 0.0109358 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 103 subms, 76 lsubms, 2.5450 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7992,12 +8053,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 26214 bytes +# Cache block size total 4194304 bytes, per-thread 99864 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 -# RSB_IO_WANT_EXECUTING_THREADS: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 +# RSB_IO_WANT_EXECUTING_THREADS: 42 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -8008,22 +8069,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.674e-01 s (100.00 %) - analyzed arrays in 6.726e-02 s (40.19 %) - cleaned-up arrays in 1.407e-05 s (0.01 %) - deduplicated arrays in 1.097e-05 s (0.01 %) + converted COO to RSB in 2.508e-02 s (100.00 %) + analyzed arrays in 9.681e-03 s (38.59 %) + cleaned-up arrays in 8.106e-06 s (0.03 %) + deduplicated arrays in 1.001e-05 s (0.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.603e-02 s (33.48 %) - memory allocations took 2.694e-05 s (0.02 %) - leafs setup took 2.480e-05 s (0.01 %) - halfword conversion took 4.400e-02 s (26.29 %) -Built (100 x 100)[0x55b8856ef7f0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 120, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 + shuffled partitions in 7.673e-03 s (30.59 %) + memory allocations took 7.868e-06 s (0.03 %) + leafs setup took 7.868e-06 s (0.03 %) + halfword conversion took 7.697e-03 s (30.68 %) +Built (100 x 100)[0x559bedd21b50]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 136, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8034,11 +8095,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8049,11 +8110,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8063,17 +8124,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.07585s; avg 0.02528s ( +/- 52.45/ 26.48 %); best 0.01202s; worst 0.03198s; std dev. 0.009378 (taking best). -Reference operation time is 0.012022 s (6.721 Mflops) with 1 threads. -Challenging best inner round reference (4.00543e-05 s/1 threads) with: subdivision 4, 120 leaves, 2.726 bytes/nz, 0.012022 s/0 threads (speedup 0.00333175 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 162 subms, 120 lsubms, 2.7255 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 2017.26 Mflops. +3 iterations (1 th.) took 0.01143s; avg 0.00381s ( +/- 3.83/ 3.44 %); best 0.003664s; worst 0.003941s; std dev. 0.0001136 (taking best). +Reference operation time is 0.00366402 s (22.05 Mflops) with 1 threads. +Challenging best inner round reference (3.98159e-05 s/1 threads) with: subdivision 4, 136 leaves, 2.776 bytes/nz, 0.00366402 s/0 threads (speedup 0.0108667 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 186 subms, 136 lsubms, 2.7762 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 2029.34 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 524288 bytes +# Cache block size total 4194304 bytes, per-thread 4194304 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 65536 -# avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 20 +# min_leaf_matrix_bytes : 32768 +# avg_leaf_matrix_bytes : 8388608 +# rsb_g_threads: 42 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -8083,23 +8144,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 1.12797 s (eq. to 3e+04/ 3e+04 old/new op.times), gained local/global speedup 1 x (4.00543e-05 : 4.00543e-05) / 1 x (4.00543e-05 : 4.00543e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.188889 s (eq. to 5e+03/ 5e+03 old/new op.times), gained local/global speedup 1 x (3.98159e-05 : 3.98159e-05) / 1 x (3.98159e-05 : 3.98159e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 1.1s, 0.79s for constructor, 0 clones) obtained NO speedup (best stays 2017 Mflops). -Second run of RSB Autotuner took 1.12808 s and estimated a speedup of 1.000000 x (4.005e-05 s -> 4.005e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.19s, 0.13s for constructor, 0 clones) obtained NO speedup (best stays 2029 Mflops). +Second run of RSB Autotuner took 0.188966 s and estimated a speedup of 1.000000 x (3.982e-05 s -> 3.982e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.027623 0.095955 0.123578 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.123578 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.027623 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.095955 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.005664 0.015769 0.021433 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.021433 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.005664 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015769 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.123578 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.021433 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -8114,68 +8175,68 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.179605 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.028986 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.0276229 0 0.0959549 -# so far, program took 13.391s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 6.029s/0.000s . +%constructor:lower-100x100-5050nz 0 0.00566411 0 0.015769 +# so far, program took 7.544s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.086s/0.000s . getrusage() stats: -ru_maxrss: 11 (maximum resident set size -- MB) -ru_stime : 0.094s (system CPU time used) -ru_utime : 72.09s (user CPU time used) +ru_maxrss: 26 (maximum resident set size -- MB) +ru_stime : 0.2222s (system CPU time used) +ru_utime : 53.14s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 4) ordered by (1,1,1,1,1,4,1) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1694.50 1.200e-02 0.000e+00 1.192e-05 0.000e+00 4.081e-01 4.47e+00 2.60e+00 1 2.02e-02 -pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1841.85 1.206e-02 0.000e+00 1.097e-05 0.000e+00 4.086e-01 2.91e+00 1.56e+00 1 2.02e-02 -pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 2386.62 1.597e-02 0.000e+00 3.386e-05 0.000e+00 4.680e-01 1.57e+00 6.50e-01 1 8.08e-02 -pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 2386.62 1.978e-02 0.000e+00 3.386e-05 0.000e+00 5.283e-01 2.84e+00 1.17e+00 1 8.08e-02 +pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 2017.26 3.779e-03 0.000e+00 1.001e-05 0.000e+00 8.037e-02 5.32e+00 2.60e+00 1 2.02e-02 +pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 2017.26 1.987e-03 0.000e+00 1.001e-05 0.000e+00 1.073e-01 3.19e+00 1.56e+00 1 2.02e-02 +pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 2017.26 3.655e-03 0.000e+00 4.005e-05 0.000e+00 8.211e-02 1.33e+00 6.50e-01 1 8.08e-02 +pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 1925.57 3.550e-03 0.000e+00 4.196e-05 0.000e+00 9.170e-02 2.29e+00 1.17e+00 1 8.08e-02 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 78951.6 % faster, avg. sp. ratio 790.516x, max sp. ratio 1099.457x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 25228.1/13824.8/37253.8/100912.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 31.0/ 26.7/ 34.0/123.9 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 31.0, min. 26.8, max. 34.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 18691.7 % faster, avg. sp. ratio 187.917x, max sp. ratio 377.381x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 5744.6/2049.9/10716.6/22978.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 30.9/ 21.3/ 54.0/123.6 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 31.1, min. 21.3, max. 54.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 1683/ 1683/ 1683) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 5050/ 5050/ 5050) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 15150/ 6733/ 26933) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 45450/ 20200/ 80800) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 2.121/ 2.121/ 2.121) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.905/ 1.551/ 4.404,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 11.794/ 1.574/ 4.471,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.988/ 1.311/ 5.243,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 12.130/ 1.331/ 5.323,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.495/ 0.650/ 2.599) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 2.080/ 2.080/ 2.080) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.45 s, min 0.41 s, max 0.53 s, tot 1.81 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.45 s, min 0.41 s, max 0.53 s, tot 1.81 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.077e+03, min 1.694e+03, max 2.387e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.126e+00, min 1.675e+00, max 5.059e+00 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.265e-05s, min 1.097e-05s, max 3.386e-05s, tot 9.060e-05s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.495e-02s, min 1.200e-02s, max 1.978e-02s, tot 5.981e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.649e+00 4.682e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.784e+00 -#pr: Record collection took 5.85 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.09 s, min 0.08 s, max 0.11 s, tot 0.36 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.09 s, min 0.08 s, max 0.11 s, tot 0.36 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.994e+03, min 1.926e+03, max 2.017e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.509e+01, min 5.345e+00, max 2.276e+01 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.551e-05s, min 1.001e-05s, max 4.196e-05s, tot 1.020e-04s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 3.243e-03s, min 1.987e-03s, max 3.779e-03s, tot 1.297e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.075e+00 1.230e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 5.469e+00 +#pr: Record collection took 1.09 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 99 environment variables in 5311 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 99 environment variables in 5364 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. #pr: ======== Saved a performance record of 4 samples to test.rpr # Removing the temporary record file test.rpr.tmp. -# terminating run at 1740166951 (after 13.4s of w.c.t.) +# terminating run at 1774579361 (after 7.5s of w.c.t.) + ls -ltr test-tuning-lower-100x100-5050nz--C-N-1--base.eps test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--D-N-1--base.eps test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--S-N-1--base.eps test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--Z-N-1--base.eps test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85632 Feb 21 07:42 test-tuning-lower-100x100-5050nz--D-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84554 Feb 21 07:42 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85632 Feb 21 07:42 test-tuning-lower-100x100-5050nz--S-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84554 Feb 21 07:42 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85632 Feb 21 07:42 test-tuning-lower-100x100-5050nz--C-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84554 Feb 21 07:42 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85632 Feb 21 07:42 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84554 Feb 21 07:42 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Mar 27 16:42 test-tuning-lower-100x100-5050nz--D-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84565 Mar 27 16:42 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Mar 27 16:42 test-tuning-lower-100x100-5050nz--S-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84565 Mar 27 16:42 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Mar 27 16:42 test-tuning-lower-100x100-5050nz--C-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84565 Mar 27 16:42 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Mar 27 16:42 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84564 Mar 27 16:42 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps + rsbench --read-performance-record test.rpr + ls -ltr test.txt --rw-r--r-- 1 pbuilder1 pbuilder1 4101 Feb 21 07:42 test.txt +-rw-r--r-- 1 pbuilder2 pbuilder2 4097 Mar 27 16:42 test.txt + RSB_PR_WLTC=2 + RSB_PR_SR=0 + rsbench --read-performance-record test.rpr @@ -8185,29 +8246,29 @@ /usr/bin/kpsepath ++ kpsepath tex ++ sed 's/!!//g;s/:/\n/g;' -+ find . /nonexistent/first-build/.texlive2024/texmf-config/tex/kpsewhich// /nonexistent/first-build/.texlive2024/texmf-var/tex/kpsewhich// /nonexistent/first-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/first-build/.texlive2024/texmf-config/tex/generic// /nonexistent/first-build/.texlive2024/texmf-var/tex/generic// /nonexistent/first-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/first-build/.texlive2024/texmf-config/tex/latex// /nonexistent/first-build/.texlive2024/texmf-var/tex/latex// /nonexistent/first-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/first-build/.texlive2024/texmf-config/tex/// /nonexistent/first-build/.texlive2024/texmf-var/tex/// /nonexistent/first-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/kpsewhich//': No such file or directory ++ find . /nonexistent/second-build/.texlive2024/texmf-config/tex/kpsewhich// /nonexistent/second-build/.texlive2024/texmf-var/tex/kpsewhich// /nonexistent/second-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/second-build/.texlive2024/texmf-config/tex/generic// /nonexistent/second-build/.texlive2024/texmf-var/tex/generic// /nonexistent/second-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/second-build/.texlive2024/texmf-config/tex/latex// /nonexistent/second-build/.texlive2024/texmf-var/tex/latex// /nonexistent/second-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/second-build/.texlive2024/texmf-config/tex/// /nonexistent/second-build/.texlive2024/texmf-var/tex/// /nonexistent/second-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/kpsewhich//': No such file or directory find: '/usr/local/share/texmf/tex/kpsewhich//': No such file or directory find: '/etc/texmf/tex/kpsewhich//': No such file or directory find: '/var/lib/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texlive/texmf-dist/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/generic//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/generic//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/generic//': No such file or directory find: '/usr/local/share/texmf/tex/generic//': No such file or directory find: '/usr/share/texmf/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex/latex//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex/latex//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/latex//': No such file or directory find: '/usr/local/share/texmf/tex/latex//': No such file or directory find: '/etc/texmf/tex/latex//': No such file or directory find: '/var/lib/texmf/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-config/tex///': No such file or directory -find: '/nonexistent/first-build/.texlive2024/texmf-var/tex///': No such file or directory -find: '/nonexistent/first-build/texmf/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-config/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2024/texmf-var/tex///': No such file or directory +find: '/nonexistent/second-build/texmf/tex///': No such file or directory find: '/usr/local/share/texmf/tex///': No such file or directory + exit 0 for mf in pd.mtx vf.mtx ; do if test -f /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; then true; else cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/$mf /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; fi; done @@ -8222,7 +8283,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x5645f5ae44a0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x55f0ffdc04a0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8237,7 +8298,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x562b46bd24a0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x5597f39224a0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8301,51 +8362,51 @@ Done. Building a matrix with 5 nnz, 5 x 5 Duplicates check: 5 - 0 = 5 - converted COO to RSB in 1.383e-01 s (100.00 %) - analyzed arrays in 5.200e-02 s (37.61 %) + converted COO to RSB in 2.798e-02 s (100.00 %) + analyzed arrays in 9.745e-03 s (34.83 %) cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 1.024e-02 s (7.41 %) - shuffled partitions in 4.402e-02 s (31.84 %) - memory allocations took 2.122e-05 s (0.02 %) - leafs setup took 8.106e-06 s (0.01 %) - halfword conversion took 3.196e-02 s (23.12 %) -Built (5 x 5)[0x562b46bd5b20]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' + deduplicated arrays in 0.000e+00 s (0.00 %) + sorted arrays in 2.677e-03 s (9.57 %) + shuffled partitions in 7.635e-03 s (27.29 %) + memory allocations took 8.106e-06 s (0.03 %) + leafs setup took 1.192e-06 s (0.00 %) + halfword conversion took 7.911e-03 s (28.27 %) +Built (5 x 5)[0x5597f3928f60]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' Allocated matrix of 5 nonzeroes: -(5 x 5)[0x562b46bd5b20]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(5 x 5)[0x5597f3928f60]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -Before auto-tuning, 100 multiplications took 1.915961s. +Before auto-tuning, 100 multiplications took 0.386547s. Threads autotuning (may take more than 1.500000s)... Will use autotuning routine to sample matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.99027e-08), 20 suggested as starting thread count(default). -3 iterations (20 th.) took 0.05671s; avg 0.0189s ( +/- 15.91/ 29.57 %); best 0.0159s; worst 0.02449s; std dev. 0.003956 (taking best). -Reference operation time is 0.0158958 s (0.001258 Mflops) with 20 threads. -3 iterations (20 th.) took 0.05116s; avg 0.01705s ( +/- 10.44/ 16.92 %); best 0.01527s; worst 0.01994s; std dev. 0.002059 (taking best). -Reference operation time is 0.0152721 s (0.00131 Mflops) with 20 threads. -After 0.108064s, autotuning routine did not find a better threads count configuration. -(5 x 5)[0x562b46bd5b20]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -After threads auto-tuning, 100 multiplications took 1.803897s -- effective speedup of 1.06212 x -Matrix autotuning (may take more than 1.500000s; using 20 threads )... +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.30005e-08), 42 suggested as starting thread count(default). +3 iterations (42 th.) took 0.01153s; avg 0.003843s ( +/- 0.99/ 1.25 %); best 0.003805s; worst 0.003891s; std dev. 3.575e-05 (taking best). +Reference operation time is 0.00380516 s (0.005256 Mflops) with 42 threads. +3 iterations (42 th.) took 0.01145s; avg 0.003818s ( +/- 3.06/ 1.58 %); best 0.003701s; worst 0.003878s; std dev. 8.251e-05 (taking best). +Reference operation time is 0.00370097 s (0.005404 Mflops) with 42 threads. +After 0.023062s, autotuning routine did not find a better threads count configuration. +(5 x 5)[0x5597f3928f60]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +After threads auto-tuning, 100 multiplications took 0.381619s -- effective speedup of 1.01291 x +Matrix autotuning (may take more than 1.500000s; using 42 threads )... Will autotune matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.99027e-08), 20 suggested as starting thread count. -3 iterations (20 th.) took 0.05999s; avg 0.02s ( +/- 20.02/ 40.00 %); best 0.01599s; worst 0.02799s; std dev. 0.005656 (taking best). -Reference operation time is 0.0159922 s (0.001251 Mflops) with 20 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.001) -Merge (2 -> 1 leaves) took w.c.t. of 5.102e-05s, ~8.106e-06s of computing time (of which 1.192e-06s sorting, 9.06e-06s analysis) -3 iterations (20 th.) took 1.216e-05s; avg 4.053e-06s ( +/- 98.52/170.59 %); best 5.99e-08s; worst 1.097e-05s; std dev. 4.913e-06 (taking best). -Reference operation time is 5.99027e-08 s (333.9 Mflops) with 20 threads. -After merge step 1: tpop: 5.99e-08 s ~Mflops: 333.875 nsubm:1 otn:20 -Applying merge (2 -> 1 leaves, 20 th.) yielded SPEEDUP of 266969.154x: 0.01599s -> 5.99e-08s, so taking this instance. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.30005e-08), 42 suggested as starting thread count. +3 iterations (42 th.) took 0.01151s; avg 0.003835s ( +/- 3.24/ 3.30 %); best 0.003711s; worst 0.003962s; std dev. 0.0001024 (taking best). +Reference operation time is 0.00371099 s (0.005389 Mflops) with 42 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 0.003711 Mflops: 0.005) +Merge (2 -> 1 leaves) took w.c.t. of 2.503e-05s, ~5.007e-06s of computing time (of which 0s sorting, 3.099e-06s analysis) +3 iterations (42 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 96.08/129.41 %); best 5.3e-08s; worst 3.099e-06s; std dev. 1.296e-06 (taking best). +Reference operation time is 5.30005e-08 s (377.4 Mflops) with 42 threads. +After merge step 1: tpop: 5.3e-08 s ~Mflops: 377.355 nsubm:1 otn:42 +Applying merge (2 -> 1 leaves, 42 th.) yielded SPEEDUP of 70017.994x: 0.003711s -> 5.3e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.05202s (of which 0.0002749s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 1.192e-06s sorting, 9.06e-06s analyzing) -Total merge + benchmarking process took 0.05202s, equivalent to 868338.3/3.3 new/old ops (0.09962s for 2 clones -- as 1663084.6/6.2 ops, or 831542.3/3.1 ops per clone), SPEEDUP of 266969.154x -Applying multi-merge (2 -> 1 leaves, 1 steps, 20 -> 20 th.sp.) yielded SPEEDUP of 266969.154x (0.01599s -> 5.99e-08s), will amortize in 3.3 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.16s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 26696815.4% (2.67e+05x) (from 0.001251 to 333.9 Mflops). -After 0.160054s, autotuning routine declared speedup of 266969 x, when using threads count of 20. -(5 x 5)[0x562b46bd91e0]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' -After threads auto-tuning, 100 multiplications took 0.000033s -- further speedup of 54826.8 x +A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.01144s (of which 0.0002689s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 0s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.01144s, equivalent to 215906.4/3.1 new/old ops (0.02244s for 2 clones -- as 423369.3/6.0 ops, or 211684.7/3.0 ops per clone), SPEEDUP of 70017.994x +Applying multi-merge (2 -> 1 leaves, 1 steps, 42 -> 42 th.sp.) yielded SPEEDUP of 70017.994x (0.003711s -> 5.3e-08s), will amortize in 3.1 ops by saving 0.003711s per op. +In 1 tuning rounds (tot. 0.034s, 0.022s for constructor, 2 clones) obtained a SPEEDUP of 7001699.4% (7.002e+04x) (from 0.005389 to 377.4 Mflops). +After 0.034294s, autotuning routine declared speedup of 70018 x, when using threads count of 42. +(5 x 5)[0x5597f392c590]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +After threads auto-tuning, 100 multiplications took 0.000028s -- further speedup of 13564.6 x 0/2 0 0 -> 0 1/2 1 0 -> 5 0/2 0 3 -> 0 @@ -8356,7 +8417,7 @@ Correctly initialized the library. Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x562b46bd5b20]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x5597f3928f60]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8507,61 +8568,68 @@ Creating 500 x 500 matrix with 62500 nonzeroes. Building a matrix with 62500 nnz, 500 x 500 Duplicates check: 62500 - 0 = 62500 - converted COO to RSB in 2.413e-01 s (100.00 %) - analyzed arrays in 4.804e-02 s (19.91 %) - cleaned-up arrays in 1.550e-04 s (0.06 %) - deduplicated arrays in 1.860e-04 s (0.08 %) - sorted arrays in 8.889e-02 s (36.84 %) - shuffled partitions in 7.296e-02 s (30.23 %) - memory allocations took 9.441e-05 s (0.04 %) - leafs setup took 2.122e-05 s (0.01 %) - halfword conversion took 3.095e-02 s (12.82 %) -Built (500 x 500)[0x55f7590072d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' + converted COO to RSB in 5.462e-02 s (100.00 %) + analyzed arrays in 9.305e-03 s (17.04 %) + cleaned-up arrays in 6.580e-05 s (0.12 %) + deduplicated arrays in 1.101e-04 s (0.20 %) + sorted arrays in 2.244e-02 s (41.08 %) + shuffled partitions in 1.514e-02 s (27.71 %) + memory allocations took 3.386e-05 s (0.06 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 7.522e-03 s (13.77 %) +Built (500 x 500)[0x561e1f0642d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 37, symflags:'' Allocated matrix of 62500 nonzeroes: -(500 x 500)[0x55f7590072d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' +(500 x 500)[0x561e1f0642d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 37, symflags:'' -Before auto-tuning, 100 multiplications took 1.894724s. +Before auto-tuning, 100 multiplications took 0.402964s. Threads autotuning (may take more than 1.500000s)... -Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz. +Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 51 subms, 37 lsubms, 2.0945 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.80907e-08), 20 suggested as starting thread count(default). -3 iterations (20 th.) took 0.07581s; avg 0.02527s ( +/- 20.88/ 25.81 %); best 0.01999s; worst 0.0318s; std dev. 0.004897 (taking best). -Reference operation time is 0.019995 s (12.5 Mflops) with 20 threads. -3 iterations (20 th.) took 0.08796s; avg 0.02932s ( +/- 18.11/ 9.29 %); best 0.02401s; worst 0.03205s; std dev. 0.003755 (taking best). -Reference operation time is 0.0240111 s (10.41 Mflops) with 20 threads. -After 0.163975s, autotuning routine did not find a better threads count configuration. -(500 x 500)[0x55f7590072d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' -After threads auto-tuning, 100 multiplications took 2.307119s -- effective speedup of 0.821251 x -Matrix autotuning (may take more than 1.500000s; using 20 threads )... -Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz. +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=3.35574e-08), 42 suggested as starting thread count(default). +3 iterations (42 th.) took 0.0118s; avg 0.003933s ( +/- 0.26/ 0.18 %); best 0.003923s; worst 0.00394s; std dev. 7.326e-06 (taking best). +Reference operation time is 0.00392294 s (63.73 Mflops) with 42 threads. +3 iterations (42 th.) took 0.01194s; avg 0.003981s ( +/- 3.69/ 5.55 %); best 0.003834s; worst 0.004202s; std dev. 0.0001591 (taking best). +Reference operation time is 0.00383401 s (65.21 Mflops) with 42 threads. +After 0.023809s, autotuning routine did not find a better threads count configuration. +(500 x 500)[0x561e1f0642d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 37, symflags:'' +After threads auto-tuning, 100 multiplications took 0.403354s -- effective speedup of 0.999034 x +Matrix autotuning (may take more than 1.500000s; using 42 threads )... +Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 51 subms, 37 lsubms, 2.0945 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.80907e-08), 20 suggested as starting thread count. -3 iterations (20 th.) took 0.04797s; avg 0.01599s ( +/- 0.11/ 0.11 %); best 0.01597s; worst 0.01601s; std dev. 1.382e-05 (taking best). -Reference operation time is 0.0159721 s (15.65 Mflops) with 20 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz (tpop: 0.01597 Mflops: 15.652) -Merge (64 -> 40 leaves) took w.c.t. of 0.08011s, ~0.4542s of computing time (of which 0.04805s sorting, 1.907e-05s analysis) -3 iterations (20 th.) took 0.07175s; avg 0.02392s ( +/- 47.89/ 31.72 %); best 0.01246s; worst 0.0315s; std dev. 0.00824 (taking best). -Reference operation time is 0.0124631 s (20.06 Mflops) with 20 threads. -After merge step 1: tpop: 0.01246 s ~Mflops: 20.059 nsubm:40 otn:20 -Applying merge (64 -> 40 leaves, 20 th.) yielded SPEEDUP of 1.282x: 0.01597s -> 0.01246s, so taking this instance. -Merge (40 -> 28 leaves) took w.c.t. of 0.01609s, ~0.004628s of computing time (of which 0.0002773s sorting, 2.313e-05s analysis) -3 iterations (20 th.) took 0.08376s; avg 0.02792s ( +/- 57.24/114.12 %); best 0.01194s; worst 0.05978s; std dev. 0.02253 (taking best). -Reference operation time is 0.0119381 s (20.94 Mflops) with 20 threads. -After merge step 2: tpop: 0.01194 s ~Mflops: 20.941 nsubm:28 otn:20 -Applying merge (40 -> 28 leaves, 20 th.) yielded SPEEDUP of 1.044x: 0.01246s -> 0.01194s, so taking this instance. -Merge (28 -> 22 leaves) took w.c.t. of 0.01611s, ~0.0002401s of computing time (of which 9.894e-05s sorting, 2.003e-05s analysis) -3 iterations (20 th.) took 0.1198s; avg 0.03994s ( +/- 48.03/ 87.94 %); best 0.02076s; worst 0.07506s; std dev. 0.02487 (taking best). -Reference operation time is 0.020757 s (12.04 Mflops) with 20 threads. -After merge step 3: tpop: 0.02076 s ~Mflops: 12.044 nsubm:22 otn:20 -Applying merge (28 -> 22 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.739x: 0.01194s -> 0.02076s. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=3.35574e-08), 42 suggested as starting thread count. +3 iterations (42 th.) took 0.009821s; avg 0.003274s ( +/- 35.52/ 21.03 %); best 0.002111s; worst 0.003962s; std dev. 0.0008268 (taking best). +Reference operation time is 0.00211096 s (118.4 Mflops) with 42 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 51 subms, 37 lsubms, 2.0945 bpnz (tpop: 0.002111 Mflops: 118.430) +Merge (37 -> 28 leaves) took w.c.t. of 0.00256s, ~0.001288s of computing time (of which 9.584e-05s sorting, 8.106e-06s analysis) +3 iterations (42 th.) took 0.01293s; avg 0.004308s ( +/- 12.59/ 23.66 %); best 0.003766s; worst 0.005328s; std dev. 0.0007214 (taking best). +Reference operation time is 0.00376606 s (66.38 Mflops) with 42 threads. +After merge step 1: tpop: 0.003766 s ~Mflops: 66.382 nsubm:28 otn:42 +Applying merge (37 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.784x: 0.002111s -> 0.003766s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (64 -> 22 subms) took 0.5121s (of which 0.1124s partitioning, 0s I/O); computing times: 0.4591s in par. loops, 0.04843s sorting, 6.223e-05s analyzing) -Total merge + benchmarking process took 0.5121s, equivalent to 42.9/32.1 new/old ops (0.1688s for 3 clones -- as 14.1/10.6 ops, or 4.7/3.5 ops per clone), SPEEDUP of 1.338x -Applying multi-merge (64 -> 28 leaves, 2 steps, 20 -> 20 th.sp.) yielded SPEEDUP of 1.338x (0.01597s -> 0.01194s), will amortize in 126.9 ops by saving 0.004034s per op. -In 1 tuning rounds (tot. 0.61s, 0.17s for constructor, 3 clones) obtained a SPEEDUP of 33.8% (1.338x) (from 15.65 to 20.94 Mflops). -After 0.605245s, autotuning routine declared speedup of 1.33791 x, when using threads count of 20. -(500 x 500)[0x55f75910fbf0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' -After threads auto-tuning, 100 multiplications took 2.127457s -- further speedup of 1.08445 x +A total of 1 merge steps (of max 6) (37 -> 28 subms) took 0.01551s (of which 0.002569s partitioning, 0s I/O); computing times: 0.001288s in par. loops, 9.584e-05s sorting, 8.106e-06s analyzing) +Total merge + benchmarking process took 0.01551s, equivalent to 7.3/7.3 new/old ops (0.01344s for 1 clones -- as 6.4/6.4 ops, or 6.4/6.4 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (42 th.) took 0.01414s; avg 0.004712s ( +/- 13.82/ 25.83 %); best 0.004061s; worst 0.005929s; std dev. 0.0008613 (taking best). +Reference operation time is 0.00406098 s (61.56 Mflops) with 42 threads. +Starting split (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 51 subms, 37 lsubms, 2.0945 bpnz (tpop: 0.004061 Mflops: 61.561) +Split (37 -> 91 leaves, 51 -> 123 subms) took 0.003151s (of which: 7.153e-06s analysis, -3.194e+10s mem.mgmt); compute time: 0.04982s overall, 0.0004046s searches, 0.04942s shuffle, 0.04416s switch, 0.0002069s quadrants. +3 iterations (42 th.) took 0.01387s; avg 0.004623s ( +/- 47.50/ 45.75 %); best 0.002427s; worst 0.006738s; std dev. 0.001761 (taking best). +Reference operation time is 0.00242686 s (103 Mflops) with 42 threads. +After split step 1: tpop: 0.002427 s ~Mflops: 103.014 nsubm:91 otn:42 +Applying split (37 -> 91 leaves, 42 th.) yielded SPEEDUP of 1.673x: 0.004061s -> 0.002427s, so taking this instance. +Split (91 -> 226 leaves, 123 -> 303 subms) took 0.006469s (of which: 1.407e-05s analysis, -7.986e+10s mem.mgmt); compute time: 0.03098s overall, 0.000258s searches, 0.03072s shuffle, 0.01866s switch, 0.002011s quadrants. +3 iterations (42 th.) took 0.01873s; avg 0.006245s ( +/- 2.29/ 2.66 %); best 0.006102s; worst 0.006411s; std dev. 0.0001273 (taking best). +Reference operation time is 0.00610185 s (40.97 Mflops) with 42 threads. +After split step 2: tpop: 0.006102 s ~Mflops: 40.971 nsubm:226 otn:42 +Applying split (91 -> 226 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 2.514x: 0.002427s -> 0.006102s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 2 split steps (of max 6) (37 -> 226 subms) took 0.05437s (of which 0.0101s partitioning, 0s I/O); computing times: 0.0808s in par. loops, 0.0006626s sorting, 2.122e-05s analyzing) +Total split + benchmarking process took 0.05437s, equivalent to 22.4/13.4 new/old ops (0.023s for 2 clones -- as 9.5/5.7 ops, or 4.7/2.8 ops per clone), SPEEDUP of 1.673x +Applying multi-split (37 -> 91 leaves, 1 steps, 42 -> 42 th.sp.) yielded SPEEDUP of 1.673x (0.004061s -> 0.002427s), will amortize in 33.3 ops by saving 0.001634s per op. +In 1 tuning rounds (tot. 0.12s, 0.036s for constructor, 3 clones) obtained a SPEEDUP of 67.3% (1.673x) (from 61.56 to 103 Mflops). +After 0.119074s, autotuning routine declared speedup of 1.67335 x, when using threads count of 42. +(500 x 500)[0x561e1f16a0a0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 91, symflags:'' +After threads auto-tuning, 100 multiplications took 0.482201s -- further speedup of 0.836485 x librsb timer-based profiling is not supported in this build. If you wish to have it, re-configure librsb with its support. So you can safely ignore the error you might just have seen printed out on screen. /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve Hello, RSB! @@ -8569,19 +8637,19 @@ Correctly initialized the library. Building a matrix with 7 nnz, 6 x 6 Duplicates check: 1 - 0 = 1 - converted COO to RSB in 9.970e-02 s (100.00 %) - analyzed arrays in 7.158e-02 s (71.80 %) - cleaned-up arrays in 1.907e-05 s (0.02 %) - deduplicated arrays in 2.146e-06 s (0.00 %) - sorted arrays in 5.007e-06 s (0.01 %) - shuffled partitions in 2.797e-02 s (28.06 %) - memory allocations took 5.770e-05 s (0.06 %) - leafs setup took 1.979e-05 s (0.02 %) - halfword conversion took 2.718e-05 s (0.03 %) -Built (6 x 6)[0x5569fc7154d0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' + converted COO to RSB in 2.725e-02 s (100.00 %) + analyzed arrays in 2.155e-02 s (79.07 %) + cleaned-up arrays in 5.007e-06 s (0.02 %) + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 2.146e-06 s (0.01 %) + shuffled partitions in 5.656e-03 s (20.75 %) + memory allocations took 1.693e-05 s (0.06 %) + leafs setup took 5.007e-06 s (0.02 %) + halfword conversion took 1.597e-05 s (0.06 %) +Built (6 x 6)[0x556dab15f4d0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x5569fc7154d0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x556dab15f4d0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8607,20 +8675,55 @@ 1 1 Will autotune matrix: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:6.58e-08 -3 iterations (20 th.) took 5.507e-05s; avg 1.836e-05s ( +/- 94.81/188.31 %); best 9.537e-07s; worst 5.293e-05s; std dev. 2.445e-05 (taking best). -Reference operation time is 9.53674e-07 s (2.097 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 9.537e-07 Mflops: 2.097) +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3.355e-08 +3 iterations (42 th.) took 4.506e-05s; avg 1.502e-05s ( +/- 99.78/200.00 %); best 3.355e-08s; worst 4.506e-05s; std dev. 2.124e-05 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 3.355e-08 Mflops: 59.621) Merge (1 -> 1 leaves) took w.c.t. of 0s, ~0s of computing time (of which 0s sorting, 0s analysis) -3 iterations (20 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 89.65/ 50.00 %); best 6.58e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.58035e-08 s (30.39 Mflops) with 20 threads. -After merge step 1: tpop: 6.58e-08 s ~Mflops: 30.394 nsubm:1 otn:20 -Applying merge (1 -> 1 leaves, 20 th.) yielded SPEEDUP of 14.493x: 9.537e-07s -> 6.58e-08s, so taking this instance. +3 iterations (42 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 89.45/200.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After merge step 1: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying merge (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (1 -> 1 subms) took 0.07194s (of which 3.815e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) -Total merge + benchmarking process took 0.07194s, equivalent to 1093300.7/75437.8 new/old ops (0.1316s for 2 clones -- as 1999333.3/137954.0 ops, or 999666.7/68977.0 ops per clone), SPEEDUP of 14.493x -Applying multi-merge (1 -> 1 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 14.493x (9.537e-07s -> 6.58e-08s), will amortize in 81028.7 ops by saving 8.879e-07s per op. -In 1 tuning rounds (tot. 0.13s, 0.13s for constructor, 2 clones) obtained a SPEEDUP of 1349.3% (14.49x) (from 2.097 to 30.39 Mflops). +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 1.693e-05s (of which 4.053e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 1.693e-05s, equivalent to 504.6/504.6 new/old ops (0.01774s for 1 clones -- as 528742.0/528742.0 ops, or 528742.0/528742.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (42 th.) took 1.311e-05s; avg 4.371e-06s ( +/- 99.23/172.73 %); best 3.355e-08s; worst 1.192e-05s; std dev. 5.361e-06 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 3.355e-08 Mflops: 59.621) +Split (1 -> 1 leaves, 1 -> 1 subms) took 3.409e-05s (of which: 6.914e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 94.72/ 50.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 1: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 4.053e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 96.75/107.69 %); best 3.355e-08s; worst 2.146e-06s; std dev. 8.778e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 2: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 6.914e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 89.45/200.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 3: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.861e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 89.45/200.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 4: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.861e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 89.45/200.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 5: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.146e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 89.45/200.00 %); best 3.355e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.35455e-08 s (59.62 Mflops) with 42 threads. +After split step 6: tpop: 3.355e-08 s ~Mflops: 59.621 nsubm:1 otn:42 +Applying split (1 -> 1 leaves, 42 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00000x): 3.355e-08s -> 3.355e-08s, so IGNORING this instance. +A total of 6 split steps (of max 6) (1 -> 1 subms) took 0.0004449s (of which 0.0003791s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 9.775e-06s analyzing) +Total split + benchmarking process took 0.0004449s, equivalent to 13262.3/13262.3 new/old ops (0.03179s for 1 clones -- as 947704.3/947704.3 ops, or 947704.3/947704.3 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.05s, 0.05s for constructor, 2 clones) obtained NO speedup (best stays 59.62 Mflops). Backsolving we should get a unitary vector: %%MatrixMarket matrix array real general @@ -8655,194 +8758,202 @@ /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran Building a matrix with 210 nnz, 20 x 20 Duplicates check: 210 - 0 = 210 - converted COO to RSB in 1.799e-01 s (100.00 %) - analyzed arrays in 4.799e-02 s (26.67 %) - cleaned-up arrays in 4.053e-06 s (0.00 %) - deduplicated arrays in 5.007e-06 s (0.00 %) - sorted arrays in 4.380e-02 s (24.34 %) - shuffled partitions in 4.396e-02 s (24.44 %) - memory allocations took 1.452e-04 s (0.08 %) - leafs setup took 1.717e-05 s (0.01 %) - halfword conversion took 4.395e-02 s (24.43 %) -Built (20 x 20)[0x563f3cb799f0]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' + converted COO to RSB in 1.506e-01 s (100.00 %) + analyzed arrays in 3.287e-02 s (21.83 %) + cleaned-up arrays in 2.861e-06 s (0.00 %) + deduplicated arrays in 2.861e-06 s (0.00 %) + sorted arrays in 2.746e-02 s (18.24 %) + shuffled partitions in 4.154e-02 s (27.59 %) + memory allocations took 4.697e-05 s (0.03 %) + leafs setup took 1.502e-05 s (0.01 %) + halfword conversion took 4.090e-02 s (27.16 %) +Built (20 x 20)[0x557de13929f0]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:1.062e-07 -Starting autotuning (16 x 1.06192e-07 s stages, transA=N, nrhs=1, timer gran.=1.06192e-07), 20 suggested as starting thread count(default). -3 iterations (20 th.) took 0.06796s; avg 0.02265s ( +/- 29.51/ 58.80 %); best 0.01597s; worst 0.03597s; std dev. 0.009418 (taking best). -Reference operation time is 0.0159671 s (0.05261 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.01597 Mflops: 0.053) -Merge (22 -> 16 leaves) took w.c.t. of 0.05252s, ~0.08049s of computing time (of which 6.914e-06s sorting, 1.311e-05s analysis) -3 iterations (20 th.) took 0.06742s; avg 0.02247s ( +/- 28.86/ 24.57 %); best 0.01599s; worst 0.02799s; std dev. 0.004949 (taking best). -Reference operation time is 0.0159881 s (0.05254 Mflops) with 20 threads. -After merge step 1: tpop: 0.01599 s ~Mflops: 0.053 nsubm:16 otn:20 -Applying merge (22 -> 16 leaves, 20 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99869x): 0.01597s -> 0.01599s, so IGNORING this instance. -Merge (16 -> 13 leaves) took w.c.t. of 5.198e-05s, ~1.812e-05s of computing time (of which 3.099e-06s sorting, 1.502e-05s analysis) -3 iterations (20 th.) took 0.06393s; avg 0.02131s ( +/- 20.56/ 30.95 %); best 0.01693s; worst 0.0279s; std dev. 0.004747 (taking best). -Reference operation time is 0.016927 s (0.04962 Mflops) with 20 threads. -After merge step 2: tpop: 0.01693 s ~Mflops: 0.050 nsubm:13 otn:20 -Applying merge (16 -> 13 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.060x: 0.01597s -> 0.01693s. -Merge (13 -> 10 leaves) took w.c.t. of 4.983e-05s, ~1.287e-05s of computing time (of which 2.146e-06s sorting, 1.383e-05s analysis) -3 iterations (20 th.) took 0.06396s; avg 0.02132s ( +/- 25.24/ 15.33 %); best 0.01594s; worst 0.02459s; std dev. 0.003835 (taking best). -Reference operation time is 0.015939 s (0.0527 Mflops) with 20 threads. -After merge step 3: tpop: 0.01594 s ~Mflops: 0.053 nsubm:10 otn:20 -Applying merge (13 -> 10 leaves, 20 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00177x): 0.01597s -> 0.01594s, so IGNORING this instance. -Merge (10 -> 8 leaves) took w.c.t. of 9.418e-05s, ~1.502e-05s of computing time (of which 3.099e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 0.06377s; avg 0.02126s ( +/- 25.77/ 31.54 %); best 0.01578s; worst 0.02796s; std dev. 0.005049 (taking best). -Reference operation time is 0.015779 s (0.05324 Mflops) with 20 threads. -After merge step 4: tpop: 0.01578 s ~Mflops: 0.053 nsubm:8 otn:20 -Applying merge (10 -> 8 leaves, 20 th.) yielded SPEEDUP of 1.012x: 0.01597s -> 0.01578s, so taking this instance. -Merge (8 -> 6 leaves) took w.c.t. of 5.102e-05s, ~1.192e-05s of computing time (of which 3.099e-06s sorting, 8.106e-06s analysis) -3 iterations (20 th.) took 0.06836s; avg 0.02279s ( +/- 30.22/ 38.81 %); best 0.0159s; worst 0.03163s; std dev. 0.00657 (taking best). -Reference operation time is 0.0158999 s (0.05283 Mflops) with 20 threads. -After merge step 5: tpop: 0.0159 s ~Mflops: 0.053 nsubm:6 otn:20 -Applying merge (8 -> 6 leaves, 20 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99240x): 0.01578s -> 0.0159s, so IGNORING this instance. -Merge (6 -> 3 leaves) took w.c.t. of 5.388e-05s, ~1.979e-05s of computing time (of which 5.96e-06s sorting, 9.06e-06s analysis) -3 iterations (20 th.) took 0.06395s; avg 0.02132s ( +/- 25.22/ 31.35 %); best 0.01594s; worst 0.028s; std dev. 0.005009 (taking best). -Reference operation time is 0.0159409 s (0.05269 Mflops) with 20 threads. -After merge step 6: tpop: 0.01594 s ~Mflops: 0.053 nsubm:3 otn:20 -Applying merge (6 -> 3 leaves, 20 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=0.98984x): 0.01578s -> 0.01594s, so IGNORING this instance. -A total of 6 merge steps (of max 6) (22 -> 3 subms) took 0.52s (of which 0.05287s partitioning, 0s I/O); computing times: 0.08057s in par. loops, 2.432e-05s sorting, 6.914e-05s analyzing) -Total merge + benchmarking process took 0.52s, equivalent to 33.0/32.6 new/old ops (0.1318s for 2 clones -- as 8.4/8.3 ops, or 4.2/4.1 ops per clone), SPEEDUP of 1.012x -Applying multi-merge (22 -> 8 leaves, 4 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 1.012x (0.01597s -> 0.01578s), will amortize in 2764.3 ops by saving 0.0001881s per op. -In 1 tuning rounds (tot. 0.65s, 0.13s for constructor, 2 clones) obtained a SPEEDUP of 1.2% (1.012x) (from 0.05261 to 0.05324 Mflops). - autotuner chose 20 threads -Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 11 subms, 8 lsubms, 3.0095 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:1.062e-07 -Starting autotuning (16 x 1.06192e-07 s stages, transA=N, nrhs=1, timer gran.=1.06192e-07), 20 suggested as starting thread count(default). -3 iterations (20 th.) took 0.07202s; avg 0.02401s ( +/- 33.39/ 33.54 %); best 0.01599s; worst 0.03206s; std dev. 0.00656 (taking best). -~ 20 threads: 0.01599s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.07194s; avg 0.02398s ( +/- 4.63/ 4.61 %); best 0.02287s; worst 0.02508s; std dev. 0.0009042 (taking best). - 19 threads: 0.02287s (0.037 Mflops) (1/2 degradations so far) - -3 iterations (18 th.) took 0.07197s; avg 0.02399s ( +/- 50.10/ 49.92 %); best 0.01197s; worst 0.03596s; std dev. 0.009795 (taking best). - 18 threads: 0.01197s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 0.07597s; avg 0.02532s ( +/- 5.23/ 10.41 %); best 0.024s; worst 0.02796s; std dev. 0.001864 (taking best). - 17 threads: 0.024s (0.035 Mflops) (1/2 degradations so far) - -3 iterations (16 th.) took 0.07243s; avg 0.02414s ( +/- 17.31/ 18.08 %); best 0.01997s; worst 0.02851s; std dev. 0.00349 (taking best). - 16 threads: 0.01997s (0.042 Mflops) (2/2 degradations so far) - -Best threads choice is 18; starting threads were 20; max speed gap is 2x; search took 0.36s. -Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 11 subms, 8 lsubms, 3.0095 bpnz (tpop: 0.01197 Mflops: 0.070) -Merge (8 -> 6 leaves) took w.c.t. of 3.6e-05s, ~1.192e-05s of computing time (of which 3.099e-06s sorting, 7.868e-06s analysis) -3 iterations (20 th.) took 0.08744s; avg 0.02915s ( +/- 19.56/ 37.22 %); best 0.02344s; worst 0.03999s; std dev. 0.007675 (taking best). -~ 20 threads: 0.02344s (0.036 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.05997s; avg 0.01999s ( +/- 32.81/ 20.10 %); best 0.01343s; worst 0.02401s; std dev. 0.004676 (taking best). - 19 threads: 0.01343s (0.063 Mflops) (0/2 degradations so far) - -3 iterations (18 th.) took 0.05998s; avg 0.01999s ( +/- 40.17/ 20.20 %); best 0.01196s; worst 0.02403s; std dev. 0.005679 (taking best). - 18 threads: 0.01196s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 0.06797s; avg 0.02266s ( +/- 47.20/ 59.02 %); best 0.01196s; worst 0.03603s; std dev. 0.01001 (taking best). - 17 threads: 0.01196s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (16 th.) took 0.09193s; avg 0.03064s ( +/- 8.63/ 4.41 %); best 0.028s; worst 0.03199s; std dev. 0.00187 (taking best). - 16 threads: 0.028s (0.03 Mflops) (1/2 degradations so far) - -3 iterations (15 th.) took 0.06397s; avg 0.02132s ( +/- 25.16/ 50.26 %); best 0.01596s; worst 0.03204s; std dev. 0.007578 (taking best). - 15 threads: 0.01596s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 18; starting threads were 20; max speed gap is 2.3x; search took 0.43s. -After merge step 1: tpop: 0.01196 s ~Mflops: 0.070 nsubm:6 otn:18 -Applying merge (8 -> 6 leaves, 18 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00066x): 0.01197s -> 0.01196s, so IGNORING this instance. -Merge (6 -> 3 leaves) took w.c.t. of 3.982e-05s, ~1.407e-05s of computing time (of which 4.053e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 0.08393s; avg 0.02798s ( +/- 28.52/ 14.37 %); best 0.02s; worst 0.032s; std dev. 0.005642 (taking best). -~ 20 threads: 0.02s (0.042 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.07999s; avg 0.02666s ( +/- 40.04/ 20.03 %); best 0.01599s; worst 0.032s; std dev. 0.007548 (taking best). - 19 threads: 0.01599s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (18 th.) took 0.06399s; avg 0.02133s ( +/- 25.04/ 50.06 %); best 0.01599s; worst 0.03201s; std dev. 0.00755 (taking best). - 18 threads: 0.01599s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (17 th.) took 0.07599s; avg 0.02533s ( +/- 36.70/ 26.20 %); best 0.01603s; worst 0.03197s; std dev. 0.006771 (taking best). - 17 threads: 0.01603s (0.052 Mflops) (2/2 degradations so far) - -Best threads choice is 19; starting threads were 20; max speed gap is 1.3x; search took 0.3s. -After merge step 2: tpop: 0.01599 s ~Mflops: 0.053 nsubm:3 otn:19 -Applying merge (6 -> 3 leaves, 19 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.335x: 0.01197s -> 0.01599s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 merge steps (of max 6) (8 -> 3 subms) took 0.7357s (of which 8.416e-05s partitioning, 0s I/O); computing times: 2.599e-05s in par. loops, 7.153e-06s sorting, 1.788e-05s analyzing) -Total merge + benchmarking process took 0.7357s, equivalent to 61.5/61.5 new/old ops (0.07561s for 1 clones -- as 6.3/6.3 ops, or 6.3/6.3 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:4.02e-08 +Starting autotuning (16 x 4.01974e-08 s stages, transA=N, nrhs=1, timer gran.=4.01974e-08), 42 suggested as starting thread count(default). +3 iterations (42 th.) took 0.05473s; avg 0.01824s ( +/- 8.37/ 7.23 %); best 0.01672s; worst 0.01956s; std dev. 0.001171 (taking best). +Reference operation time is 0.0167179 s (0.05025 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.01672 Mflops: 0.050) +Merge (22 -> 16 leaves) took w.c.t. of 0.004539s, ~0.0008948s of computing time (of which 4.292e-06s sorting, 1.097e-05s analysis) +3 iterations (42 th.) took 0.05092s; avg 0.01697s ( +/- 13.06/ 16.24 %); best 0.01476s; worst 0.01973s; std dev. 0.002066 (taking best). +Reference operation time is 0.014756 s (0.05693 Mflops) with 42 threads. +After merge step 1: tpop: 0.01476 s ~Mflops: 0.057 nsubm:16 otn:42 +Applying merge (22 -> 16 leaves, 42 th.) yielded SPEEDUP of 1.133x: 0.01672s -> 0.01476s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.006125s, ~2.789e-05s of computing time (of which 6.199e-06s sorting, 1.097e-05s analysis) +3 iterations (42 th.) took 0.0599s; avg 0.01997s ( +/- 26.30/ 15.35 %); best 0.01472s; worst 0.02303s; std dev. 0.00373 (taking best). +Reference operation time is 0.0147152 s (0.05708 Mflops) with 42 threads. +After merge step 2: tpop: 0.01472 s ~Mflops: 0.057 nsubm:10 otn:42 +Applying merge (16 -> 10 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00277x): 0.01476s -> 0.01472s, so IGNORING this instance. +Merge (10 -> 8 leaves) took w.c.t. of 3.6e-05s, ~1.216e-05s of computing time (of which 2.861e-06s sorting, 1.001e-05s analysis) +3 iterations (42 th.) took 0.04776s; avg 0.01592s ( +/- 28.82/ 29.69 %); best 0.01133s; worst 0.02065s; std dev. 0.003804 (taking best). +Reference operation time is 0.0113311 s (0.07413 Mflops) with 42 threads. +After merge step 3: tpop: 0.01133 s ~Mflops: 0.074 nsubm:8 otn:42 +Applying merge (10 -> 8 leaves, 42 th.) yielded SPEEDUP of 1.302x: 0.01476s -> 0.01133s, so taking this instance. +Merge (8 -> 6 leaves) took w.c.t. of 3.91e-05s, ~1.407e-05s of computing time (of which 1.907e-06s sorting, 8.106e-06s analysis) +3 iterations (42 th.) took 0.05118s; avg 0.01706s ( +/- 23.38/ 13.14 %); best 0.01307s; worst 0.0193s; std dev. 0.002827 (taking best). +Reference operation time is 0.0130711 s (0.06426 Mflops) with 42 threads. +After merge step 4: tpop: 0.01307 s ~Mflops: 0.064 nsubm:6 otn:42 +Applying merge (8 -> 6 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.154x: 0.01133s -> 0.01307s. +Merge (6 -> 3 leaves) took w.c.t. of 3.695e-05s, ~1.597e-05s of computing time (of which 2.861e-06s sorting, 5.96e-06s analysis) +3 iterations (42 th.) took 0.04669s; avg 0.01556s ( +/- 19.71/ 11.44 %); best 0.0125s; worst 0.01734s; std dev. 0.002179 (taking best). +Reference operation time is 0.0124969 s (0.06722 Mflops) with 42 threads. +After merge step 5: tpop: 0.0125 s ~Mflops: 0.067 nsubm:3 otn:42 +Applying merge (6 -> 3 leaves, 42 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.103x: 0.01133s -> 0.0125s. +Merge (3 -> 1 leaves) took w.c.t. of 3.481e-05s, ~1.383e-05s of computing time (of which 3.099e-06s sorting, 8.106e-06s analysis) +3 iterations (42 th.) took 1.121e-05s; avg 3.735e-06s ( +/- 74.47/142.55 %); best 9.537e-07s; worst 9.06e-06s; std dev. 3.766e-06 (taking best). +Reference operation time is 9.53674e-07 s (880.8 Mflops) with 42 threads. +After merge step 6: tpop: 9.537e-07 s ~Mflops: 880.804 nsubm:1 otn:42 +Applying merge (3 -> 1 leaves, 42 th.) yielded SPEEDUP of 11881.500x: 0.01133s -> 9.537e-07s, so taking this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 6 merge steps (of max 6) (22 -> 1 subms) took 0.397s (of which 0.0109s partitioning, 0s I/O); computing times: 0.0009787s in par. loops, 2.122e-05s sorting, 5.412e-05s analyzing) +Total merge + benchmarking process took 0.397s, equivalent to 416263.8/23.7 new/old ops (0.1837s for 4 clones -- as 192669.5/11.0 ops, or 48167.4/2.7 ops per clone), SPEEDUP of 17530.000x +Applying multi-merge (22 -> 1 leaves, 6 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 17530.000x (0.01672s -> 9.537e-07s), will amortize in 23.7 ops by saving 0.01672s per op. +In 1 tuning rounds (tot. 0.51s, 0.18s for constructor, 4 clones) obtained a SPEEDUP of 1752900.0% (1.753e+04x) (from 0.05025 to 880.8 Mflops). + autotuner chose 42 threads +Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:4.02e-08 +Starting autotuning (16 x 4.01974e-08 s stages, transA=N, nrhs=1, timer gran.=4.01974e-08), 42 suggested as starting thread count(default). +3 iterations (42 th.) took 1.812e-05s; avg 6.04e-06s ( +/- 84.21/164.47 %); best 9.537e-07s; worst 1.597e-05s; std dev. 7.025e-06 (taking best). +~ 42 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (41 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 41 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (40 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 40 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (39 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 96.11/107.69 %); best 4.02e-08s; worst 2.146e-06s; std dev. 8.778e-07 (taking best). + 39 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (38 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 96.11/ 84.62 %); best 4.02e-08s; worst 1.907e-06s; std dev. 7.867e-07 (taking best). + 38 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (37 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 37 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (36 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 36 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (35 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 35 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (34 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 34 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (33 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 96.11/ 84.62 %); best 4.02e-08s; worst 1.907e-06s; std dev. 7.867e-07 (taking best). + 33 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (32 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 32 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (31 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 31 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (30 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 30 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (29 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 29 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (28 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 28 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (27 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 27 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (26 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 26 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (25 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 25 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (24 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 24 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (23 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 23 threads: 9.537e-07s (8.8e+02 Mflops) (2/2 degradations so far) - +Best threads choice is 40; starting threads were 42; max speed gap is 24x; search took 0.00022s. +Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz (tpop: 4.02e-08 Mflops: 20896.888) +Merge (1 -> 1 leaves) took w.c.t. of 0s, ~0s of computing time (of which 0s sorting, 0s analysis) +3 iterations (42 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). +~ 42 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (41 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 41 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (40 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 40 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (39 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 39 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (38 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 38 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (37 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 37 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (36 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 36 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (35 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 35 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (34 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 34 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (33 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 33 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (32 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 32 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (31 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 31 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (30 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 30 threads: 9.537e-07s (8.8e+02 Mflops) (2/2 degradations so far) - +Best threads choice is 41; starting threads were 42; max speed gap is 24x; search took 8.4e-05s. +After merge step 1: tpop: 4.02e-08 s ~Mflops: 20896.888 nsubm:1 otn:41 +Applying merge (1 -> 1 leaves, 41 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 4.02e-08s -> 4.02e-08s, so IGNORING this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 9.894e-05s (of which 3.099e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 9.894e-05s, equivalent to 2461.4/2461.4 new/old ops (0.0518s for 1 clones -- as 1288641.8/1288641.8 ops, or 1288641.8/1288641.8 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (20 th.) took 0.06401s; avg 0.02134s ( +/- 25.05/ 49.98 %); best 0.01599s; worst 0.032s; std dev. 0.007541 (taking best). -~ 20 threads: 0.01599s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.07196s; avg 0.02399s ( +/- 49.91/ 49.81 %); best 0.01202s; worst 0.03593s; std dev. 0.009764 (taking best). - 19 threads: 0.01202s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (18 th.) took 0.05994s; avg 0.01998s ( +/- 37.66/ 19.85 %); best 0.01246s; worst 0.02395s; std dev. 0.005323 (taking best). - 18 threads: 0.01246s (0.067 Mflops) (1/2 degradations so far) - -3 iterations (17 th.) took 0.04799s; avg 0.016s ( +/- 25.07/ 49.96 %); best 0.01199s; worst 0.02399s; std dev. 0.005651 (taking best). - 17 threads: 0.01199s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (16 th.) took 0.05996s; avg 0.01999s ( +/- 39.96/ 20.04 %); best 0.012s; worst 0.02399s; std dev. 0.005647 (taking best). - 16 threads: 0.012s (0.07 Mflops) (1/2 degradations so far) - -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 25.05/ 41.60 %); best 0.01199s; worst 0.02265s; std dev. 0.004738 (taking best). - 15 threads: 0.01199s (0.07 Mflops) (2/2 degradations so far) - -Best threads choice is 17; starting threads were 20; max speed gap is 1.3x; search took 0.35s. -Starting split (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 11 subms, 8 lsubms, 3.0095 bpnz (tpop: 0.01199 Mflops: 0.070) -Split (8 -> 19 leaves, 11 -> 26 subms) took 0.001737s (of which: 5.96e-06s analysis, -6.961e+09s mem.mgmt); compute time: 0.002469s overall, 1.168e-05s searches, 0.002458s shuffle, 0.001589s switch, 3.409e-05s quadrants. -3 iterations (20 th.) took 0.1302s; avg 0.04339s ( +/- 44.70/ 89.35 %); best 0.024s; worst 0.08217s; std dev. 0.02742 (taking best). -~ 20 threads: 0.024s (0.035 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.07594s; avg 0.02531s ( +/- 21.03/ 26.15 %); best 0.01999s; worst 0.03193s; std dev. 0.00496 (taking best). - 19 threads: 0.01999s (0.042 Mflops) (0/2 degradations so far) - -3 iterations (18 th.) took 0.05639s; avg 0.0188s ( +/- 33.83/ 27.54 %); best 0.01244s; worst 0.02397s; std dev. 0.004782 (taking best). - 18 threads: 0.01244s (0.068 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 0.05569s; avg 0.01856s ( +/- 41.31/ 26.84 %); best 0.0109s; worst 0.02355s; std dev. 0.005503 (taking best). - 17 threads: 0.0109s (0.077 Mflops) (0/2 degradations so far) - -3 iterations (16 th.) took 0.0404s; avg 0.01347s ( +/- 10.90/ 17.15 %); best 0.012s; worst 0.01578s; std dev. 0.001653 (taking best). - 16 threads: 0.012s (0.07 Mflops) (1/2 degradations so far) - -3 iterations (15 th.) took 0.08338s; avg 0.02779s ( +/- 29.82/ 16.88 %); best 0.01951s; worst 0.03249s; std dev. 0.005877 (taking best). - 15 threads: 0.01951s (0.043 Mflops) (2/2 degradations so far) - -Best threads choice is 17; starting threads were 20; max speed gap is 2.2x; search took 0.44s. -After split step 1: tpop: 0.0109 s ~Mflops: 0.077 nsubm:19 otn:17 -Applying split (8 -> 19 leaves, 17 th.) yielded SPEEDUP of 1.100x: 0.01199s -> 0.0109s, so taking this instance. -Split (19 -> 42 leaves, 26 -> 58 subms) took 0.04392s (of which: 6.914e-06s analysis, -1.566e+10s mem.mgmt); compute time: 0.1893s overall, 1.979e-05s searches, 0.1893s shuffle, 0.1637s switch, 6.39e-05s quadrants. -3 iterations (20 th.) took 0.0799s; avg 0.02663s ( +/- 54.80/ 34.82 %); best 0.01204s; worst 0.03591s; std dev. 0.01045 (taking best). -~ 20 threads: 0.01204s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.06798s; avg 0.02266s ( +/- 11.61/ 5.85 %); best 0.02003s; worst 0.02398s; std dev. 0.00186 (taking best). - 19 threads: 0.02003s (0.042 Mflops) (1/2 degradations so far) - -3 iterations (18 th.) took 0.06799s; avg 0.02266s ( +/- 46.12/ 41.45 %); best 0.01221s; worst 0.03206s; std dev. 0.008138 (taking best). - 18 threads: 0.01221s (0.069 Mflops) (2/2 degradations so far) - -Best threads choice is 20; starting threads were 20; max speed gap is 1.7x; search took 0.22s. -After split step 2: tpop: 0.01204 s ~Mflops: 0.070 nsubm:42 otn:20 -Applying split (19 -> 42 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.105x: 0.0109s -> 0.01204s. -Split (42 -> 102 leaves, 58 -> 139 subms) took 0.03215s (of which: 1.502e-05s analysis, -3.654e+10s mem.mgmt); compute time: 0.008393s overall, 2.337e-05s searches, 0.00837s shuffle, 0.004984s switch, 0.0007017s quadrants. -3 iterations (20 th.) took 0.07587s; avg 0.02529s ( +/- 20.68/ 26.31 %); best 0.02006s; worst 0.03195s; std dev. 0.004956 (taking best). -~ 20 threads: 0.02006s (0.042 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.048s; avg 0.016s ( +/- 26.09/ 49.56 %); best 0.01183s; worst 0.02393s; std dev. 0.00561 (taking best). - 19 threads: 0.01183s (0.071 Mflops) (0/2 degradations so far) - -3 iterations (18 th.) took 0.0637s; avg 0.02123s ( +/- 7.16/ 14.26 %); best 0.01971s; worst 0.02426s; std dev. 0.002141 (taking best). - 18 threads: 0.01971s (0.043 Mflops) (1/2 degradations so far) - -3 iterations (17 th.) took 0.0604s; avg 0.02013s ( +/- 34.31/ 18.95 %); best 0.01323s; worst 0.02395s; std dev. 0.004893 (taking best). - 17 threads: 0.01323s (0.064 Mflops) (2/2 degradations so far) - -Best threads choice is 19; starting threads were 20; max speed gap is 1.7x; search took 0.25s. -After split step 3: tpop: 0.01183 s ~Mflops: 0.071 nsubm:102 otn:19 -Applying split (42 -> 102 leaves, 19 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.085x: 0.0109s -> 0.01183s. -Split (102 -> 146 leaves, 139 -> 198 subms) took 0.03131s (of which: 2.098e-05s analysis, -2.61e+10s mem.mgmt); compute time: 0.005425s overall, 2.217e-05s searches, 0.005403s shuffle, 0.003196s switch, 0.0003247s quadrants. -3 iterations (20 th.) took 0.06386s; avg 0.02129s ( +/- 25.49/ 50.15 %); best 0.01586s; worst 0.03196s; std dev. 0.007549 (taking best). -~ 20 threads: 0.01586s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (19 th.) took 0.08389s; avg 0.02796s ( +/- 14.23/ 14.26 %); best 0.02399s; worst 0.03195s; std dev. 0.003252 (taking best). - 19 threads: 0.02399s (0.035 Mflops) (1/2 degradations so far) - -3 iterations (18 th.) took 0.06394s; avg 0.02131s ( +/- 25.20/ 12.64 %); best 0.01594s; worst 0.02401s; std dev. 0.003797 (taking best). - 18 threads: 0.01594s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 20; starting threads were 20; max speed gap is 1.5x; search took 0.21s. -After split step 4: tpop: 0.01586 s ~Mflops: 0.053 nsubm:146 otn:20 -Applying split (102 -> 146 leaves, 20 th.) yielded SLOWDOWN (3th of 3 tolerable) of 1.456x: 0.0109s -> 0.01586s. -Skipping further split based tests after 3 definite performance degradations in a row (and last exceeding limit). -A total of 4 split steps (of max 6) (8 -> 146 subms) took 1.308s (of which 0.1098s partitioning, 0s I/O); computing times: 0.2056s in par. loops, 7.701e-05s sorting, 4.888e-05s analyzing) -Total split + benchmarking process took 1.308s, equivalent to 120.0/109.1 new/old ops (0.1595s for 2 clones -- as 14.6/13.3 ops, or 7.3/6.7 ops per clone), SPEEDUP of 1.100x -Applying multi-split (8 -> 19 leaves, 1 steps, 17 -> 17 th.sp.) yielded SPEEDUP of 1.100x (0.01199s -> 0.0109s), will amortize in 1201.0 ops by saving 0.001089s per op. -In 1 tuning rounds (tot. 2.9s, 0.24s for constructor, 3 clones) obtained a SPEEDUP of 10.0% (1.1x) (from 0.07009 to 0.07709 Mflops). +3 iterations (42 th.) took 1.502e-05s; avg 5.007e-06s ( +/- 99.20/180.95 %); best 4.02e-08s; worst 1.407e-05s; std dev. 6.418e-06 (taking best). +~ 42 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (41 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 41 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (40 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 40 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (39 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 39 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (38 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 38 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (37 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 37 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (36 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 36 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (35 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 94.38/ 66.67 %); best 4.02e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 35 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (34 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 93.68/ 50.00 %); best 4.02e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 34 threads: 4.02e-08s (2.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (33 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 33 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (32 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 32 threads: 9.537e-07s (8.8e+02 Mflops) (2/2 degradations so far) - +Best threads choice is 42; starting threads were 42; max speed gap is 24x; search took 0.00012s. +Starting split (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz (tpop: 4.02e-08 Mflops: 20896.888) +Split (1 -> 3 leaves, 1 -> 4 subms) took 5.794e-05s (of which: 5.007e-06s analysis, -1.775e+09s mem.mgmt); compute time: 2.599e-05s overall, 5.96e-06s searches, 2.003e-05s shuffle, 6.914e-06s switch, 9.537e-07s quadrants. +3 iterations (42 th.) took 0.04836s; avg 0.01612s ( +/- 30.77/ 19.49 %); best 0.01116s; worst 0.01926s; std dev. 0.003549 (taking best). +~ 42 threads: 0.01116s (0.075 Mflops) (0/2 degradations so far) - +3 iterations (41 th.) took 0.04471s; avg 0.0149s ( +/- 19.96/ 11.86 %); best 0.01193s; worst 0.01667s; std dev. 0.002116 (taking best). + 41 threads: 0.01193s (0.07 Mflops) (1/2 degradations so far) - +3 iterations (40 th.) took 0.03924s; avg 0.01308s ( +/- 29.62/ 56.91 %); best 0.009206s; worst 0.02052s; std dev. 0.005265 (taking best). + 40 threads: 0.009206s (0.091 Mflops) (0/2 degradations so far) - +3 iterations (39 th.) took 0.05622s; avg 0.01874s ( +/- 4.08/ 6.48 %); best 0.01797s; worst 0.01996s; std dev. 0.0008687 (taking best). + 39 threads: 0.01797s (0.047 Mflops) (1/2 degradations so far) - +3 iterations (38 th.) took 0.04786s; avg 0.01595s ( +/- 21.78/ 14.29 %); best 0.01248s; worst 0.01823s; std dev. 0.002497 (taking best). + 38 threads: 0.01248s (0.067 Mflops) (2/2 degradations so far) - +Best threads choice is 40; starting threads were 42; max speed gap is 2x; search took 0.24s. +After split step 1: tpop: 0.009206 s ~Mflops: 0.091 nsubm:3 otn:40 +Applying split (1 -> 3 leaves, 40 th.) yielded SLOWDOWN (1th of 3 tolerable) of 229021.352x: 4.02e-08s -> 0.009206s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 1 split steps (of max 6) (1 -> 3 subms) took 0.2367s (of which 7.701e-05s partitioning, 0s I/O); computing times: 2.599e-05s in par. loops, 5.96e-06s sorting, 5.007e-06s analyzing) +Total split + benchmarking process took 0.2367s, equivalent to 5887378.4/5887378.4 new/old ops (0.05344s for 1 clones -- as 1329418.7/1329418.7 ops, or 1329418.7/1329418.7 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.34s, 0.11s for constructor, 2 clones) obtained NO speedup (best stays 2.09e+04 Mflops). check results are ok Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 7.100e-02 s (100.00 %) - analyzed arrays in 3.199e-02 s (45.05 %) - cleaned-up arrays in 1.907e-06 s (0.00 %) - deduplicated arrays in 9.537e-07 s (0.00 %) - sorted arrays in 2.301e-02 s (32.40 %) - shuffled partitions in 1.598e-02 s (22.51 %) - memory allocations took 9.298e-06 s (0.01 %) - leafs setup took 2.861e-06 s (0.00 %) - halfword conversion took 5.960e-06 s (0.01 %) -Built (6 x 6)[0x563f3cb78340]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' + converted COO to RSB in 3.504e-02 s (100.00 %) + analyzed arrays in 1.813e-02 s (51.75 %) + cleaned-up arrays in 1.192e-06 s (0.00 %) + deduplicated arrays in 1.192e-06 s (0.00 %) + sorted arrays in 9.952e-03 s (28.40 %) + shuffled partitions in 6.930e-03 s (19.78 %) + memory allocations took 1.574e-05 s (0.04 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 2.861e-06 s (0.01 %) +Built (6 x 6)[0x557de13965b0]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' Read matrix pd.mtx 6 x 6 : 36 Matrix has no symmetry Using NRHS=4 -Repeated USMV took 0.4601E-04 s +Repeated USMV took 0.7391E-04 s A single USMM took 0.2408E-04 s -USMM-to-USMV speed ratio is is 1.911 x +USMM-to-USMV speed ratio is is 3.069 x Call auto-tuning routine.. Repeat measurement. -Tuned USMM took 0.5007E-05 s -Tuned-to-untuned speed ratio is is 4.810 x +Tuned USMM took 0.3099E-05 s +Tuned-to-untuned speed ratio is is 7.769 x FAILED: 0 PASSED: 2 /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran_rsb_fi @@ -8891,230 +9002,281 @@ Loading matrix from file "/build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx". Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.829e-01 s (100.00 %) - analyzed arrays in 5.831e-02 s (31.88 %) - cleaned-up arrays in 3.099e-06 s (0.00 %) - deduplicated arrays in 2.861e-06 s (0.00 %) - sorted arrays in 3.656e-02 s (19.99 %) - shuffled partitions in 5.035e-02 s (27.53 %) - memory allocations took 6.294e-05 s (0.03 %) - leafs setup took 1.407e-05 s (0.01 %) - halfword conversion took 3.759e-02 s (20.55 %) -Built (6 x 6)[0x559c414c8a20]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + converted COO to RSB in 7.196e-02 s (100.00 %) + analyzed arrays in 1.924e-02 s (26.74 %) + cleaned-up arrays in 2.146e-06 s (0.00 %) + deduplicated arrays in 3.099e-06 s (0.00 %) + sorted arrays in 9.764e-03 s (13.57 %) + shuffled partitions in 1.775e-02 s (24.66 %) + memory allocations took 2.074e-05 s (0.03 %) + leafs setup took 1.311e-05 s (0.02 %) + halfword conversion took 2.515e-02 s (34.95 %) +Built (6 x 6)[0x5646fa7dba20]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Considering D clone. Base matrix: -(6 x 6)[0x559c414cde10]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x5646fa7e4060]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.07591s; avg 0.0253s ( +/- 6.82/ 11.93 %); best 0.02358s; worst 0.02832s; std dev. 0.002142 (taking best). -Reference operation time is 0.0235751 s (0.006108 Mflops) with 20 threads. -After 0.076067s, autotuning routine did not find a better threads count configuration. +3 iterations (42 th.) took 0.0218s; avg 0.007268s ( +/- 20.28/ 10.18 %); best 0.005794s; worst 0.008008s; std dev. 0.001042 (taking best). +Reference operation time is 0.00579405 s (0.02485 Mflops) with 42 threads. +After 0.021918s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.08016s; avg 0.02672s ( +/- 10.22/ 20.36 %); best 0.02399s; worst 0.03216s; std dev. 0.003847 (taking best). -Reference operation time is 0.023988 s (0.006003 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.02399 Mflops: 0.006) -Merge (22 -> 16 leaves) took w.c.t. of 0.0124s, ~0.0002198s of computing time (of which 9.06e-06s sorting, 1.097e-05s analysis) -3 iterations (20 th.) took 0.1113s; avg 0.0371s ( +/- 35.86/ 61.52 %); best 0.0238s; worst 0.05992s; std dev. 0.01621 (taking best). -Reference operation time is 0.023798 s (0.006051 Mflops) with 20 threads. -After merge step 1: tpop: 0.0238 s ~Mflops: 0.006 nsubm:16 otn:20 -Applying merge (22 -> 16 leaves, 20 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00798x): 0.02399s -> 0.0238s, so IGNORING this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.01206s, ~0.0001447s of computing time (of which 7.868e-06s sorting, 1.502e-05s analysis) -3 iterations (20 th.) took 0.1358s; avg 0.04527s ( +/- 60.52/ 42.63 %); best 0.01787s; worst 0.06457s; std dev. 0.01991 (taking best). -Reference operation time is 0.017874 s (0.008056 Mflops) with 20 threads. -After merge step 2: tpop: 0.01787 s ~Mflops: 0.008 nsubm:10 otn:20 -Applying merge (16 -> 10 leaves, 20 th.) yielded SPEEDUP of 1.342x: 0.02399s -> 0.01787s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 5.603e-05s, ~1.407e-05s of computing time (of which 3.099e-06s sorting, 1.693e-05s analysis) -3 iterations (20 th.) took 0.07603s; avg 0.02534s ( +/- 21.49/ 26.56 %); best 0.0199s; worst 0.03207s; std dev. 0.005054 (taking best). -Reference operation time is 0.019897 s (0.007237 Mflops) with 20 threads. -After merge step 3: tpop: 0.0199 s ~Mflops: 0.007 nsubm:7 otn:20 -Applying merge (10 -> 7 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.113x: 0.01787s -> 0.0199s. -Merge (7 -> 4 leaves) took w.c.t. of 4.387e-05s, ~1.311e-05s of computing time (of which 1.907e-06s sorting, 1.216e-05s analysis) -3 iterations (20 th.) took 0.06384s; avg 0.02128s ( +/- 7.08/ 13.56 %); best 0.01977s; worst 0.02416s; std dev. 0.002042 (taking best). -Reference operation time is 0.0197721 s (0.007283 Mflops) with 20 threads. -After merge step 4: tpop: 0.01977 s ~Mflops: 0.007 nsubm:4 otn:20 -Applying merge (7 -> 4 leaves, 20 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.106x: 0.01787s -> 0.01977s. -Merge (4 -> 1 leaves) took w.c.t. of 4.721e-05s, ~1.597e-05s of computing time (of which 3.099e-06s sorting, 6.914e-06s analysis) -3 iterations (20 th.) took 1.192e-05s; avg 3.974e-06s ( +/- 98.47/176.00 %); best 6.095e-08s; worst 1.097e-05s; std dev. 4.961e-06 (taking best). -Reference operation time is 6.09517e-08 s (2363 Mflops) with 20 threads. -After merge step 5: tpop: 6.095e-08 s ~Mflops: 2362.526 nsubm:1 otn:20 -Applying merge (4 -> 1 leaves, 20 th.) yielded SPEEDUP of 293248.582x: 0.01787s -> 6.095e-08s, so taking this instance. -Merged all the matrix leaves: no reason to continue merging. -A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.5678s (of which 0.02487s partitioning, 0s I/O); computing times: 0.0004077s in par. loops, 2.503e-05s sorting, 6.199e-05s analyzing) -Total merge + benchmarking process took 0.5678s, equivalent to 9315439.1/23.7 new/old ops (0.2155s for 3 clones -- as 3536244.9/9.0 ops, or 1178748.3/3.0 ops per clone), SPEEDUP of 393557.598x -Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 393557.598x (0.02399s -> 6.095e-08s), will amortize in 23.7 ops by saving 0.02399s per op. -In 1 tuning rounds (tot. 0.71s, 0.22s for constructor, 3 clones) obtained a SPEEDUP of 39355659.8% (3.936e+05x) (from 0.006003 to 2363 Mflops). -After 0.707969s, global autotuning declared speedup of 393558 x, when using threads count of 20 and a new matrix: -(6 x 6)[0x559c414d4810]{D} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +3 iterations (42 th.) took 0.02394s; avg 0.00798s ( +/- 17.22/ 18.72 %); best 0.006606s; worst 0.009474s; std dev. 0.001174 (taking best). +Reference operation time is 0.00660586 s (0.0218 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.006606 Mflops: 0.022) +Merge (22 -> 16 leaves) took w.c.t. of 0.00468s, ~0.004155s of computing time (of which 4.768e-06s sorting, 1.097e-05s analysis) +3 iterations (42 th.) took 0.04512s; avg 0.01504s ( +/- 48.47/ 52.64 %); best 0.00775s; worst 0.02296s; std dev. 0.006224 (taking best). +Reference operation time is 0.00775003 s (0.01858 Mflops) with 42 threads. +After merge step 1: tpop: 0.00775 s ~Mflops: 0.019 nsubm:16 otn:42 +Applying merge (22 -> 16 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.173x: 0.006606s -> 0.00775s. +Merge (16 -> 10 leaves) took w.c.t. of 0.002125s, ~4.292e-05s of computing time (of which 5.245e-06s sorting, 1.001e-05s analysis) +3 iterations (42 th.) took 0.05867s; avg 0.01956s ( +/- 16.52/ 28.64 %); best 0.01633s; worst 0.02516s; std dev. 0.003975 (taking best). +Reference operation time is 0.0163262 s (0.00882 Mflops) with 42 threads. +After merge step 2: tpop: 0.01633 s ~Mflops: 0.009 nsubm:10 otn:42 +Applying merge (16 -> 10 leaves, 42 th.) yielded SLOWDOWN (2th of 3 tolerable) of 2.471x: 0.006606s -> 0.01633s. +Skipping further merge based tests after 2 definite performance degradations in a row (and last exceeding limit). +A total of 2 merge steps (of max 6) (22 -> 10 subms) took 0.1107s (of which 0.006836s partitioning, 0s I/O); computing times: 0.004198s in par. loops, 1.001e-05s sorting, 2.098e-05s analyzing) +Total merge + benchmarking process took 0.1107s, equivalent to 16.8/16.8 new/old ops (0.02766s for 1 clones -- as 4.2/4.2 ops, or 4.2/4.2 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (42 th.) took 0.05224s; avg 0.01741s ( +/- 34.57/ 28.87 %); best 0.01139s; worst 0.02244s; std dev. 0.004564 (taking best). +Reference operation time is 0.0113931 s (0.01264 Mflops) with 42 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01139 Mflops: 0.013) +Split (22 -> 28 leaves, 29 -> 37 subms) took 0.009518s (of which: 6.914e-06s analysis, -3.549e+09s mem.mgmt); compute time: 0.001065s overall, 4.053e-06s searches, 0.001061s shuffle, 0.0009811s switch, 3.099e-06s quadrants. +3 iterations (42 th.) took 0.04237s; avg 0.01412s ( +/- 19.72/ 29.73 %); best 0.01134s; worst 0.01832s; std dev. 0.003021 (taking best). +Reference operation time is 0.0113389 s (0.0127 Mflops) with 42 threads. +After split step 1: tpop: 0.01134 s ~Mflops: 0.013 nsubm:28 otn:42 +Applying split (22 -> 28 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00477x): 0.01139s -> 0.01134s, so IGNORING this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01444s (of which: 8.821e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.04047s; avg 0.01349s ( +/- 24.07/ 38.26 %); best 0.01024s; worst 0.01865s; std dev. 0.00369 (taking best). +Reference operation time is 0.0102429 s (0.01406 Mflops) with 42 threads. +After split step 2: tpop: 0.01024 s ~Mflops: 0.014 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.112x: 0.01139s -> 0.01024s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.01347s (of which: 9.06e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.04832s; avg 0.01611s ( +/- 28.86/ 24.21 %); best 0.01146s; worst 0.02s; std dev. 0.003529 (taking best). +Reference operation time is 0.0114579 s (0.01257 Mflops) with 42 threads. +After split step 3: tpop: 0.01146 s ~Mflops: 0.013 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.119x: 0.01024s -> 0.01146s. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.007472s (of which: 7.868e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.03193s; avg 0.01064s ( +/- 45.13/ 55.42 %); best 0.005841s; worst 0.01654s; std dev. 0.004438 (taking best). +Reference operation time is 0.00584102 s (0.02465 Mflops) with 42 threads. +After split step 4: tpop: 0.005841 s ~Mflops: 0.025 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.754x: 0.01024s -> 0.005841s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.009444s (of which: 7.868e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 1.053s; avg 0.3509s ( +/- 96.93/192.68 %); best 0.01079s; worst 1.027s; std dev. 0.478 (taking best). +Reference operation time is 0.010787 s (0.01335 Mflops) with 42 threads. +After split step 5: tpop: 0.01079 s ~Mflops: 0.013 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.847x: 0.005841s -> 0.01079s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 5 split steps (of max 6) (22 -> 28 subms) took 1.344s (of which 0.05498s partitioning, 0s I/O); computing times: 0.001065s in par. loops, 4.053e-06s sorting, 4.053e-05s analyzing) +Total split + benchmarking process took 1.344s, equivalent to 230.1/118.0 new/old ops (0.1169s for 3 clones -- as 20.0/10.3 ops, or 6.7/3.4 ops per clone), SPEEDUP of 1.951x +Applying multi-split (22 -> 28 leaves, 4 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 1.951x (0.01139s -> 0.005841s), will amortize in 242.0 ops by saving 0.005552s per op. +In 1 tuning rounds (tot. 1.6s, 0.14s for constructor, 4 clones) obtained a SPEEDUP of 95.1% (1.951x) (from 0.01264 to 0.02465 Mflops). +After 1.602483s, global autotuning declared speedup of 1.95053 x, when using threads count of 42 and a new matrix: +(6 x 6)[0x5646fa7eb3e0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Considering S clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.599e-01 s (100.00 %) - analyzed arrays in 6.389e-02 s (39.95 %) - cleaned-up arrays in 1.907e-06 s (0.00 %) + converted COO to RSB in 7.742e-02 s (100.00 %) + analyzed arrays in 2.544e-02 s (32.86 %) + cleaned-up arrays in 9.537e-07 s (0.00 %) deduplicated arrays in 1.192e-06 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.799e-02 s (30.01 %) - memory allocations took 1.717e-05 s (0.01 %) - leafs setup took 9.060e-06 s (0.01 %) - halfword conversion took 4.802e-02 s (30.02 %) -Built (6 x 6)[0x559c414cde10]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' + shuffled partitions in 2.111e-02 s (27.26 %) + memory allocations took 1.001e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.085e-02 s (39.85 %) +Built (6 x 6)[0x5646fa7e4060]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Base matrix: -(6 x 6)[0x559c414cde10]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +(6 x 6)[0x5646fa7e4060]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.04792s; avg 0.01597s ( +/- 0.78/ 1.00 %); best 0.01585s; worst 0.01613s; std dev. 0.0001188 (taking best). -Reference operation time is 0.0158491 s (0.009086 Mflops) with 20 threads. -After 0.048007s, autotuning routine did not find a better threads count configuration. +3 iterations (42 th.) took 0.03754s; avg 0.01251s ( +/- 37.57/ 28.59 %); best 0.007812s; worst 0.01609s; std dev. 0.003472 (taking best). +Reference operation time is 0.00781202 s (0.01843 Mflops) with 42 threads. +After 0.037595s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.05132s; avg 0.01711s ( +/- 11.02/ 17.69 %); best 0.01522s; worst 0.02013s; std dev. 0.002161 (taking best). -Reference operation time is 0.0152202 s (0.009461 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.01522 Mflops: 0.009) -Merge (16 -> 10 leaves) took w.c.t. of 0.008287s, ~7.677e-05s of computing time (of which 1.216e-05s sorting, 1.097e-05s analysis) -3 iterations (20 th.) took 0.1035s; avg 0.03449s ( +/- 53.60/ 84.10 %); best 0.016s; worst 0.0635s; std dev. 0.02077 (taking best). -Reference operation time is 0.0160031 s (0.008998 Mflops) with 20 threads. -After merge step 1: tpop: 0.016 s ~Mflops: 0.009 nsubm:10 otn:20 -Applying merge (16 -> 10 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.051x: 0.01522s -> 0.016s. -Merge (10 -> 7 leaves) took w.c.t. of 5.484e-05s, ~1.502e-05s of computing time (of which 3.099e-06s sorting, 1.287e-05s analysis) -3 iterations (20 th.) took 0.07193s; avg 0.02398s ( +/- 33.09/ 33.44 %); best 0.01604s; worst 0.032s; std dev. 0.006513 (taking best). -Reference operation time is 0.0160429 s (0.008976 Mflops) with 20 threads. -After merge step 2: tpop: 0.01604 s ~Mflops: 0.009 nsubm:7 otn:20 -Applying merge (10 -> 7 leaves, 20 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.054x: 0.01522s -> 0.01604s. -Merge (7 -> 4 leaves) took w.c.t. of 4.005e-05s, ~1.216e-05s of computing time (of which 1.907e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 0.07943s; avg 0.02648s ( +/- 9.34/ 18.66 %); best 0.02401s; worst 0.03142s; std dev. 0.003494 (taking best). -Reference operation time is 0.0240052 s (0.005999 Mflops) with 20 threads. -After merge step 3: tpop: 0.02401 s ~Mflops: 0.006 nsubm:4 otn:20 -Applying merge (7 -> 4 leaves, 20 th.) yielded SLOWDOWN (3th of 3 tolerable) of 1.577x: 0.01522s -> 0.02401s. -Skipping further merge based tests after 3 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (16 -> 4 subms) took 0.2639s (of which 0.00842s partitioning, 0s I/O); computing times: 0.000104s in par. loops, 1.717e-05s sorting, 3.386e-05s analyzing) -Total merge + benchmarking process took 0.2639s, equivalent to 17.3/17.3 new/old ops (0.04854s for 1 clones -- as 3.2/3.2 ops, or 3.2/3.2 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +3 iterations (42 th.) took 0.04061s; avg 0.01354s ( +/- 42.06/ 27.81 %); best 0.007843s; worst 0.0173s; std dev. 0.004095 (taking best). +Reference operation time is 0.00784302 s (0.01836 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.007843 Mflops: 0.018) +Merge (16 -> 10 leaves) took w.c.t. of 0.001798s, ~3.481e-05s of computing time (of which 4.292e-06s sorting, 6.914e-06s analysis) +3 iterations (42 th.) took 0.04142s; avg 0.01381s ( +/- 18.66/ 27.51 %); best 0.01123s; worst 0.0176s; std dev. 0.002742 (taking best). +Reference operation time is 0.011229 s (0.01282 Mflops) with 42 threads. +After merge step 1: tpop: 0.01123 s ~Mflops: 0.013 nsubm:10 otn:42 +Applying merge (16 -> 10 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.432x: 0.007843s -> 0.01123s. +Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 1 merge steps (of max 6) (16 -> 10 subms) took 0.04327s (of which 0.001811s partitioning, 0s I/O); computing times: 3.481e-05s in par. loops, 4.292e-06s sorting, 6.914e-06s analyzing) +Total merge + benchmarking process took 0.04327s, equivalent to 5.5/5.5 new/old ops (0.03592s for 1 clones -- as 4.6/4.6 ops, or 4.6/4.6 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (20 th.) took 0.05615s; avg 0.01872s ( +/- 14.26/ 28.10 %); best 0.01605s; worst 0.02398s; std dev. 0.003719 (taking best). -Reference operation time is 0.0160489 s (0.008973 Mflops) with 20 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.01605 Mflops: 0.009) -Split (16 -> 28 leaves, 21 -> 37 subms) took 0.02375s (of which: 6.914e-06s analysis, -6.961e+09s mem.mgmt); compute time: 0.002416s overall, 9.06e-06s searches, 0.002407s shuffle, 0.002169s switch, 1.001e-05s quadrants. -3 iterations (20 th.) took 0.06398s; avg 0.02133s ( +/- 43.44/ 33.96 %); best 0.01206s; worst 0.02857s; std dev. 0.006889 (taking best). -Reference operation time is 0.012063 s (0.01194 Mflops) with 20 threads. -After split step 1: tpop: 0.01206 s ~Mflops: 0.012 nsubm:28 otn:20 -Applying split (16 -> 28 leaves, 20 th.) yielded SPEEDUP of 1.330x: 0.01605s -> 0.01206s, so taking this instance. -Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02793s (of which: 9.775e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (20 th.) took 0.07998s; avg 0.02666s ( +/- 9.90/ 19.71 %); best 0.02402s; worst 0.03191s; std dev. 0.003716 (taking best). -Reference operation time is 0.02402 s (0.005995 Mflops) with 20 threads. -After split step 2: tpop: 0.02402 s ~Mflops: 0.006 nsubm:28 otn:20 -Applying split (28 -> 28 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.991x: 0.01206s -> 0.02402s. +3 iterations (42 th.) took 0.036s; avg 0.012s ( +/- 33.18/ 24.57 %); best 0.008018s; worst 0.01495s; std dev. 0.002922 (taking best). +Reference operation time is 0.00801778 s (0.01796 Mflops) with 42 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.008018 Mflops: 0.018) +Split (16 -> 28 leaves, 21 -> 37 subms) took 0.009523s (of which: 6.199e-06s analysis, -7.098e+09s mem.mgmt); compute time: 3.29e-05s overall, 9.537e-07s searches, 3.195e-05s shuffle, 1.287e-05s switch, 2.861e-06s quadrants. +3 iterations (42 th.) took 0.02655s; avg 0.008849s ( +/- 12.57/ 22.56 %); best 0.007737s; worst 0.01085s; std dev. 0.001415 (taking best). +Reference operation time is 0.00773692 s (0.01861 Mflops) with 42 threads. +After split step 1: tpop: 0.007737 s ~Mflops: 0.019 nsubm:28 otn:42 +Applying split (16 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.036x: 0.008018s -> 0.007737s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.007982s (of which: 8.106e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.0355s; avg 0.01183s ( +/- 18.38/ 13.83 %); best 0.009658s; worst 0.01347s; std dev. 0.001602 (taking best). +Reference operation time is 0.0096581 s (0.01491 Mflops) with 42 threads. +After split step 2: tpop: 0.009658 s ~Mflops: 0.015 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.248x: 0.007737s -> 0.009658s. Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 split steps (of max 6) (16 -> 28 subms) took 0.2439s (of which 0.0519s partitioning, 0s I/O); computing times: 0.002416s in par. loops, 9.06e-06s sorting, 1.669e-05s analyzing) -Total split + benchmarking process took 0.2439s, equivalent to 20.2/15.2 new/old ops (0.1198s for 2 clones -- as 9.9/7.5 ops, or 5.0/3.7 ops per clone), SPEEDUP of 1.330x -Applying multi-split (16 -> 28 leaves, 1 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 1.330x (0.01605s -> 0.01206s), will amortize in 61.2 ops by saving 0.003986s per op. -In 1 tuning rounds (tot. 0.74s, 0.17s for constructor, 3 clones) obtained a SPEEDUP of 33.0% (1.33x) (from 0.008973 to 0.01194 Mflops). -After 0.735909s, global autotuning declared speedup of 1.33042 x, when using threads count of 20 and a new matrix: -(6 x 6)[0x559c414d5fb0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +A total of 2 split steps (of max 6) (16 -> 28 subms) took 0.1066s (of which 0.0176s partitioning, 0s I/O); computing times: 3.29e-05s in par. loops, 9.537e-07s sorting, 1.431e-05s analyzing) +Total split + benchmarking process took 0.1066s, equivalent to 13.8/13.3 new/old ops (0.06617s for 2 clones -- as 8.6/8.3 ops, or 4.3/4.1 ops per clone), SPEEDUP of 1.036x +Applying multi-split (16 -> 28 leaves, 1 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 1.036x (0.008018s -> 0.007737s), will amortize in 379.7 ops by saving 0.0002809s per op. +In 1 tuning rounds (tot. 0.3s, 0.1s for constructor, 3 clones) obtained a SPEEDUP of 3.6% (1.036x) (from 0.01796 to 0.01861 Mflops). +After 0.302004s, global autotuning declared speedup of 1.0363 x, when using threads count of 42 and a new matrix: +(6 x 6)[0x5646fa7eb3e0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Considering C clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.514e-01 s (100.00 %) - analyzed arrays in 5.543e-02 s (36.60 %) - cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 2.146e-06 s (0.00 %) + converted COO to RSB in 6.612e-02 s (100.00 %) + analyzed arrays in 1.452e-02 s (21.97 %) + cleaned-up arrays in 2.146e-06 s (0.00 %) + deduplicated arrays in 0.000e+00 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.808e-02 s (31.75 %) - memory allocations took 1.502e-05 s (0.01 %) - leafs setup took 8.821e-06 s (0.01 %) - halfword conversion took 4.791e-02 s (31.63 %) -Built (6 x 6)[0x559c414d5dd0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + shuffled partitions in 2.588e-02 s (39.14 %) + memory allocations took 1.001e-05 s (0.02 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 2.569e-02 s (38.86 %) +Built (6 x 6)[0x5646fa7f4ee0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Base matrix: -(6 x 6)[0x559c414d5dd0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x5646fa7f4ee0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.07515s; avg 0.02505s ( +/- 23.13/ 27.73 %); best 0.01926s; worst 0.032s; std dev. 0.005265 (taking best). -Reference operation time is 0.0192571 s (0.02991 Mflops) with 20 threads. -After 0.075215s, autotuning routine did not find a better threads count configuration. +3 iterations (42 th.) took 0.04034s; avg 0.01345s ( +/- 45.42/ 40.11 %); best 0.007339s; worst 0.01884s; std dev. 0.004721 (taking best). +Reference operation time is 0.007339 s (0.07848 Mflops) with 42 threads. +After 0.040393s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.06301s; avg 0.021s ( +/- 22.19/ 12.65 %); best 0.01634s; worst 0.02366s; std dev. 0.003306 (taking best). -Reference operation time is 0.0163448 s (0.03524 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01634 Mflops: 0.035) -Merge (22 -> 16 leaves) took w.c.t. of 0.004506s, ~5.984e-05s of computing time (of which 9.06e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 0.1114s; avg 0.03715s ( +/- 56.86/ 92.36 %); best 0.01602s; worst 0.07146s; std dev. 0.02448 (taking best). -Reference operation time is 0.0160241 s (0.03595 Mflops) with 20 threads. -After merge step 1: tpop: 0.01602 s ~Mflops: 0.036 nsubm:16 otn:20 -Applying merge (22 -> 16 leaves, 20 th.) yielded SPEEDUP of 1.020x: 0.01634s -> 0.01602s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.01207s, ~5.77e-05s of computing time (of which 5.96e-06s sorting, 1.097e-05s analysis) -3 iterations (20 th.) took 0.0879s; avg 0.0293s ( +/- 59.00/104.30 %); best 0.01201s; worst 0.05986s; std dev. 0.02167 (taking best). -Reference operation time is 0.0120132 s (0.04795 Mflops) with 20 threads. -After merge step 2: tpop: 0.01201 s ~Mflops: 0.048 nsubm:10 otn:20 -Applying merge (16 -> 10 leaves, 20 th.) yielded SPEEDUP of 1.334x: 0.01602s -> 0.01201s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 3.409e-05s, ~1.121e-05s of computing time (of which 1.907e-06s sorting, 6.914e-06s analysis) -3 iterations (20 th.) took 0.05827s; avg 0.01942s ( +/- 3.34/ 2.99 %); best 0.01877s; worst 0.02s; std dev. 0.0005045 (taking best). -Reference operation time is 0.0187731 s (0.03068 Mflops) with 20 threads. -After merge step 3: tpop: 0.01877 s ~Mflops: 0.031 nsubm:7 otn:20 -Applying merge (10 -> 7 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.563x: 0.01201s -> 0.01877s. +3 iterations (42 th.) took 0.03635s; avg 0.01212s ( +/- 38.48/ 37.06 %); best 0.007455s; worst 0.01661s; std dev. 0.003739 (taking best). +Reference operation time is 0.00745487 s (0.07726 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.007455 Mflops: 0.077) +Merge (22 -> 16 leaves) took w.c.t. of 0.003995s, ~3.195e-05s of computing time (of which 8.106e-06s sorting, 6.914e-06s analysis) +3 iterations (42 th.) took 0.04127s; avg 0.01376s ( +/- 45.66/ 25.12 %); best 0.007475s; worst 0.01721s; std dev. 0.004449 (taking best). +Reference operation time is 0.0074749 s (0.07706 Mflops) with 42 threads. +After merge step 1: tpop: 0.007475 s ~Mflops: 0.077 nsubm:16 otn:42 +Applying merge (22 -> 16 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99732x): 0.007455s -> 0.007475s, so IGNORING this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.007133s, ~2.599e-05s of computing time (of which 4.292e-06s sorting, 7.868e-06s analysis) +3 iterations (42 th.) took 0.05137s; avg 0.01712s ( +/- 33.75/ 38.05 %); best 0.01134s; worst 0.02364s; std dev. 0.005047 (taking best). +Reference operation time is 0.0113442 s (0.05077 Mflops) with 42 threads. +After merge step 2: tpop: 0.01134 s ~Mflops: 0.051 nsubm:10 otn:42 +Applying merge (16 -> 10 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.522x: 0.007455s -> 0.01134s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (22 -> 7 subms) took 0.4108s (of which 0.01665s partitioning, 0s I/O); computing times: 0.0001287s in par. loops, 1.693e-05s sorting, 2.789e-05s analyzing) -Total merge + benchmarking process took 0.4108s, equivalent to 34.2/25.1 new/old ops (0.182s for 3 clones -- as 15.1/11.1 ops, or 5.0/3.7 ops per clone), SPEEDUP of 1.361x -Applying multi-merge (22 -> 10 leaves, 2 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 1.361x (0.01634s -> 0.01201s), will amortize in 94.8 ops by saving 0.004332s per op. -In 1 tuning rounds (tot. 0.52s, 0.18s for constructor, 3 clones) obtained a SPEEDUP of 36.1% (1.361x) (from 0.03524 to 0.04795 Mflops). -After 0.519747s, global autotuning declared speedup of 1.36057 x, when using threads count of 20 and a new matrix: -(6 x 6)[0x559c414d8420]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 10, symflags:'' +A total of 2 merge steps (of max 6) (22 -> 10 subms) took 0.1042s (of which 0.01114s partitioning, 0s I/O); computing times: 5.794e-05s in par. loops, 1.24e-05s sorting, 1.478e-05s analyzing) +Total merge + benchmarking process took 0.1042s, equivalent to 14.0/14.0 new/old ops (0.03696s for 1 clones -- as 5.0/5.0 ops, or 5.0/5.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (42 th.) took 0.03532s; avg 0.01177s ( +/- 4.19/ 8.09 %); best 0.01128s; worst 0.01273s; std dev. 0.0006739 (taking best). +Reference operation time is 0.011281 s (0.05106 Mflops) with 42 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01128 Mflops: 0.051) +Split (22 -> 28 leaves, 29 -> 37 subms) took 0.0052s (of which: 8.106e-06s analysis, -3.549e+09s mem.mgmt); compute time: 2.813e-05s overall, 9.537e-07s searches, 2.718e-05s shuffle, 1.001e-05s switch, 2.146e-06s quadrants. +3 iterations (42 th.) took 0.04494s; avg 0.01498s ( +/- 20.82/ 14.30 %); best 0.01186s; worst 0.01712s; std dev. 0.002256 (taking best). +Reference operation time is 0.0118601 s (0.04857 Mflops) with 42 threads. +After split step 1: tpop: 0.01186 s ~Mflops: 0.049 nsubm:28 otn:42 +Applying split (22 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.051x: 0.01128s -> 0.01186s. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.005482s (of which: 7.868e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.04576s; avg 0.01525s ( +/- 49.68/ 38.88 %); best 0.007675s; worst 0.02118s; std dev. 0.005636 (taking best). +Reference operation time is 0.00767493 s (0.07505 Mflops) with 42 threads. +After split step 2: tpop: 0.007675 s ~Mflops: 0.075 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.470x: 0.01128s -> 0.007675s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.005868s (of which: 8.106e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.03676s; avg 0.01225s ( +/- 36.75/ 29.72 %); best 0.00775s; worst 0.01589s; std dev. 0.00338 (taking best). +Reference operation time is 0.00775003 s (0.07432 Mflops) with 42 threads. +After split step 3: tpop: 0.00775 s ~Mflops: 0.074 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99031x): 0.007675s -> 0.00775s, so IGNORING this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.00574s (of which: 6.914e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.04516s; avg 0.01505s ( +/- 18.97/ 23.43 %); best 0.0122s; worst 0.01858s; std dev. 0.002649 (taking best). +Reference operation time is 0.012198 s (0.04722 Mflops) with 42 threads. +After split step 4: tpop: 0.0122 s ~Mflops: 0.047 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.589x: 0.007675s -> 0.0122s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 4 split steps (of max 6) (22 -> 28 subms) took 0.232s (of which 0.02249s partitioning, 0s I/O); computing times: 2.813e-05s in par. loops, 9.537e-07s sorting, 3.099e-05s analyzing) +Total split + benchmarking process took 0.232s, equivalent to 30.2/20.6 new/old ops (0.07019s for 2 clones -- as 9.1/6.2 ops, or 4.6/3.1 ops per clone), SPEEDUP of 1.470x +Applying multi-split (22 -> 28 leaves, 2 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 1.470x (0.01128s -> 0.007675s), will amortize in 64.3 ops by saving 0.003606s per op. +In 1 tuning rounds (tot. 0.48s, 0.11s for constructor, 3 clones) obtained a SPEEDUP of 47.0% (1.47x) (from 0.05106 to 0.07505 Mflops). +After 0.478546s, global autotuning declared speedup of 1.46985 x, when using threads count of 42 and a new matrix: +(6 x 6)[0x5646fa7f7ae0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Considering Z clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.089e-01 s (100.00 %) - analyzed arrays in 4.093e-02 s (37.57 %) - cleaned-up arrays in 1.192e-06 s (0.00 %) + converted COO to RSB in 7.343e-02 s (100.00 %) + analyzed arrays in 2.863e-02 s (38.99 %) + cleaned-up arrays in 9.537e-07 s (0.00 %) deduplicated arrays in 9.537e-07 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.224e-02 s (29.59 %) - memory allocations took 1.788e-05 s (0.02 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.574e-02 s (32.81 %) -Built (6 x 6)[0x559c414d5dd0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' + shuffled partitions in 2.746e-02 s (37.40 %) + memory allocations took 1.526e-05 s (0.02 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 1.730e-02 s (23.57 %) +Built (6 x 6)[0x5646fa7eb3e0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Base matrix: -(6 x 6)[0x559c414d5dd0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +(6 x 6)[0x5646fa7eb3e0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.05267s; avg 0.01756s ( +/- 14.06/ 19.32 %); best 0.01509s; worst 0.02095s; std dev. 0.002481 (taking best). -Reference operation time is 0.01509 s (0.03817 Mflops) with 20 threads. -After 0.052739s, autotuning routine did not find a better threads count configuration. +3 iterations (42 th.) took 0.02954s; avg 0.009847s ( +/- 36.75/ 21.55 %); best 0.006228s; worst 0.01197s; std dev. 0.002572 (taking best). +Reference operation time is 0.00622797 s (0.09249 Mflops) with 42 threads. +After 0.029599s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (20 th.) took 0.04798s; avg 0.01599s ( +/- 6.65/ 6.73 %); best 0.01493s; worst 0.01707s; std dev. 0.0008732 (taking best). -Reference operation time is 0.0149291 s (0.03858 Mflops) with 20 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.01493 Mflops: 0.039) -Merge (28 -> 22 leaves) took w.c.t. of 0.008097s, ~6.39e-05s of computing time (of which 9.06e-06s sorting, 1.097e-05s analysis) -3 iterations (20 th.) took 0.104s; avg 0.03466s ( +/- 67.89/ 98.34 %); best 0.01113s; worst 0.06874s; std dev. 0.02468 (taking best). -Reference operation time is 0.0111279 s (0.05176 Mflops) with 20 threads. -After merge step 1: tpop: 0.01113 s ~Mflops: 0.052 nsubm:22 otn:20 -Applying merge (28 -> 22 leaves, 20 th.) yielded SPEEDUP of 1.342x: 0.01493s -> 0.01113s, so taking this instance. -Merge (22 -> 16 leaves) took w.c.t. of 0.005322s, ~6.008e-05s of computing time (of which 8.106e-06s sorting, 1.001e-05s analysis) -3 iterations (20 th.) took 0.09072s; avg 0.03024s ( +/- 73.07/107.03 %); best 0.008143s; worst 0.06261s; std dev. 0.02339 (taking best). -Reference operation time is 0.00814295 s (0.07074 Mflops) with 20 threads. -After merge step 2: tpop: 0.008143 s ~Mflops: 0.071 nsubm:16 otn:20 -Applying merge (22 -> 16 leaves, 20 th.) yielded SPEEDUP of 1.367x: 0.01113s -> 0.008143s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.02401s, ~8.512e-05s of computing time (of which 9.06e-06s sorting, 8.821e-06s analysis) -3 iterations (20 th.) took 0.1215s; avg 0.04052s ( +/- 60.60/116.97 %); best 0.01596s; worst 0.08791s; std dev. 0.03352 (taking best). -Reference operation time is 0.015964 s (0.03608 Mflops) with 20 threads. -After merge step 3: tpop: 0.01596 s ~Mflops: 0.036 nsubm:10 otn:20 -Applying merge (16 -> 10 leaves, 20 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.960x: 0.008143s -> 0.01596s. +3 iterations (42 th.) took 0.03151s; avg 0.0105s ( +/- 33.30/ 25.52 %); best 0.007005s; worst 0.01318s; std dev. 0.002588 (taking best). +Reference operation time is 0.00700498 s (0.08223 Mflops) with 42 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.007005 Mflops: 0.082) +Merge (28 -> 22 leaves) took w.c.t. of 0.003458s, ~3.195e-05s of computing time (of which 5.722e-06s sorting, 8.821e-06s analysis) +3 iterations (42 th.) took 0.03761s; avg 0.01254s ( +/- 9.13/ 7.87 %); best 0.01139s; worst 0.01352s; std dev. 0.0008772 (taking best). +Reference operation time is 0.01139 s (0.05057 Mflops) with 42 threads. +After merge step 1: tpop: 0.01139 s ~Mflops: 0.051 nsubm:22 otn:42 +Applying merge (28 -> 22 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.626x: 0.007005s -> 0.01139s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (28 -> 10 subms) took 0.4577s (of which 0.03752s partitioning, 0s I/O); computing times: 0.0002091s in par. loops, 2.623e-05s sorting, 2.98e-05s analyzing) -Total merge + benchmarking process took 0.4577s, equivalent to 56.2/30.7 new/old ops (0.1509s for 3 clones -- as 18.5/10.1 ops, or 6.2/3.4 ops per clone), SPEEDUP of 1.833x -Applying multi-merge (28 -> 16 leaves, 2 steps, 0 -> 20 th.sp.) yielded SPEEDUP of 1.833x (0.01493s -> 0.008143s), will amortize in 67.4 ops by saving 0.006786s per op. -In 1 tuning rounds (tot. 0.55s, 0.15s for constructor, 3 clones) obtained a SPEEDUP of 83.3% (1.833x) (from 0.03858 to 0.07074 Mflops). -After 0.553090s, global autotuning declared speedup of 1.83337 x, when using threads count of 20 and a new matrix: -(6 x 6)[0x559c414dbc80]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +A total of 1 merge steps (of max 6) (28 -> 22 subms) took 0.04112s (of which 0.003472s partitioning, 0s I/O); computing times: 3.195e-05s in par. loops, 5.722e-06s sorting, 8.821e-06s analyzing) +Total merge + benchmarking process took 0.04112s, equivalent to 5.9/5.9 new/old ops (0.02907s for 1 clones -- as 4.2/4.2 ops, or 4.2/4.2 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (42 th.) took 0.03911s; avg 0.01304s ( +/- 12.44/ 13.57 %); best 0.01142s; worst 0.01481s; std dev. 0.001389 (taking best). +Reference operation time is 0.011416 s (0.05046 Mflops) with 42 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.01142 Mflops: 0.050) +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.006069s (of which: 8.106e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.03278s; avg 0.01093s ( +/- 14.85/ 14.98 %); best 0.009302s; worst 0.01256s; std dev. 0.001331 (taking best). +Reference operation time is 0.00930214 s (0.06192 Mflops) with 42 threads. +After split step 1: tpop: 0.009302 s ~Mflops: 0.062 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.227x: 0.01142s -> 0.009302s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.007467s (of which: 6.914e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.03499s; avg 0.01166s ( +/- 25.22/ 28.73 %); best 0.008723s; worst 0.01502s; std dev. 0.002585 (taking best). +Reference operation time is 0.00872302 s (0.06603 Mflops) with 42 threads. +After split step 2: tpop: 0.008723 s ~Mflops: 0.066 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.066x: 0.009302s -> 0.008723s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.006113s (of which: 7.868e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.02961s; avg 0.009869s ( +/- 20.58/ 14.58 %); best 0.007838s; worst 0.01131s; std dev. 0.001477 (taking best). +Reference operation time is 0.00783801 s (0.07349 Mflops) with 42 threads. +After split step 3: tpop: 0.007838 s ~Mflops: 0.073 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SPEEDUP of 1.113x: 0.008723s -> 0.007838s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.004565s (of which: 6.914e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.04049s; avg 0.0135s ( +/- 30.48/ 50.36 %); best 0.009383s; worst 0.02029s; std dev. 0.004841 (taking best). +Reference operation time is 0.00938296 s (0.06139 Mflops) with 42 threads. +After split step 4: tpop: 0.009383 s ~Mflops: 0.061 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.197x: 0.007838s -> 0.009383s. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.008869s (of which: 9.06e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (42 th.) took 0.0355s; avg 0.01183s ( +/- 19.12/ 10.98 %); best 0.009571s; worst 0.01313s; std dev. 0.001605 (taking best). +Reference operation time is 0.00957084 s (0.06018 Mflops) with 42 threads. +After split step 5: tpop: 0.009571 s ~Mflops: 0.060 nsubm:28 otn:42 +Applying split (28 -> 28 leaves, 42 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.221x: 0.007838s -> 0.009571s. +Skipping further split based tests after 2 definite performance degradations in a row (and last exceeding limit). +A total of 5 split steps (of max 6) (28 -> 28 subms) took 0.308s (of which 0.03355s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 3.886e-05s analyzing) +Total split + benchmarking process took 0.308s, equivalent to 39.3/27.0 new/old ops (0.1364s for 4 clones -- as 17.4/11.9 ops, or 4.4/3.0 ops per clone), SPEEDUP of 1.456x +Applying multi-split (28 -> 28 leaves, 3 steps, 0 -> 42 th.sp.) yielded SPEEDUP of 1.456x (0.01142s -> 0.007838s), will amortize in 86.1 ops by saving 0.003578s per op. +In 1 tuning rounds (tot. 0.48s, 0.17s for constructor, 5 clones) obtained a SPEEDUP of 45.6% (1.456x) (from 0.05046 to 0.07349 Mflops). +After 0.484537s, global autotuning declared speedup of 1.45649 x, when using threads count of 42 and a new matrix: +(6 x 6)[0x5646fa7f7ae0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -9245,9 +9407,9 @@ 4 0 DIFF PRINT TEST END Beginning large binary search test. -Detected 84322365440 bytes of memory, comprehensive of 16603684864 of free memory. +Detected 84307623936 bytes of memory, comprehensive of 57301454848 of free memory. On this system, maximal array of coordinates can have 2147483137 elements and occupy 8589932548 bytes. -Will perform the test using less memory (17592186039907 MB) than on the maximal coordinate indices array (18446744068982446080) allows. +Will perform the test using less memory (17592186036249 MB) than on the maximal coordinate indices array (18446744065146035200) allows. Skipping test: too little memory. Skipping large binary search test. BASIC SPARSE BLAS TEST: BEGIN @@ -9256,7 +9418,7 @@ got RSB_IO_WANT_IS_INITIALIZED_MARKER: 1 INIT INTERFACE TEST: END (SUCCESS) DEVEL PRINT TEST: BEGIN -(4 x 4)[0x55b1bf850740]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(4 x 4)[0x55a6c82e9e70]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9265,8 +9427,8 @@ RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS | RSB_FLAG_OWN_PARTITIONING_ARRAYS | RSB_FLAG_SORT_INPUT -(2 x 2)[0x55b1bf850850]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x55b1bf850960]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x55a6c82e9f80]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x55a6c82ea090]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' #R 4 x 4, 4 nnz (16 bytes), 16 index space for bytes, 544 bytes for 2 structs (2 of which are on the diagonal) (1e+02% of nnz are on the diagonal) #N at 0 0, 4 x 4, 4 nnz ( 25%) #T at 0 0, 2 x 2, 2 nnz ( 50%) @@ -9274,9 +9436,9 @@ ( 0x2046186 = { rec:1 coo:1 css:1 hw:1 ic:1 fi:0 symflags: } ) DEVEL PRINT TEST: END PRINT TEST: BEGIN [QUIET] -(2 x 2)[0x55b1bf850850]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x55b1bf850960]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(4 x 4)[0x55b1bf850740]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(2 x 2)[0x55a6c82e9f80]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x55a6c82ea090]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(4 x 4)[0x55a6c82e9e70]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9294,8 +9456,8 @@ BASIC PRIMITIVES TEST: BEGIN BASIC PRIMITIVES TEST: END (SUCCESS) ADVANCED SPARSE BLAS TEST: BEGIN [limit 30.000000s] [QUIET] -Terminating testing earlier due to user timeout request: test took 30.035926 s, max allowed was 30.000000. - PASSED:761 +Terminating testing earlier due to user timeout request: test took 30.193036 s, max allowed was 30.000000. + PASSED:1831 FAILED:0 ADVANCED SPARSE BLAS TEST: END (SUCCESS) gmake qtests -C librsbpp @@ -9304,75 +9466,75 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' ./rsbtt -if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh +if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh ++ ./rsbpp Td,s G.mtx -++ wc -l ++ grep Z-sort +++ wc -l + test 54 = 54 ++ ./rsbpp Td G.mtx -++ wc -l ++ grep Z-sort +++ wc -l + test 27 = 27 ++ ./rsbpp Td,z G.mtx -++ wc -l ++ grep Z-sort +++ wc -l + test 54 = 54 ++ ./rsbpp vTd,z G.mtx -++ wc -l ++ grep Z-sort +++ wc -l + test 54 = 54 -++ grep Z-sort ++ ./rsbpp vTd,z G.mtx +++ grep Z-sort ++ wc -l + test 54 = 54 -++ wc -l -++ grep Zorted ++ ./rsbpp vvvTd,z G.mtx +++ grep Zorted +++ wc -l + test 8 = 8 ++ ./rsbpp vvTd,z G.mtx ++ grep Z-sort ++ wc -l + test 54 = 54 -++ wc -l ++ ./rsbpp vvTd,z G.mtx ++ grep Range +++ wc -l + test 0 = 0 -++ grep Range ++ ./rsbpp vvvTd,z G.mtx +++ grep Range ++ wc -l + test 258 -gt 0 -++ wc -l -++ grep Range ++ ./rsbpp vvvTd,z S.mtx +++ grep Range +++ wc -l + test 0 -eq 0 ++ ./rsbpp vvvTd,z G.mtx ++ grep Range ++ wc -l + test 258 = 258 -++ wc -l ++ OMP_NUM_THREADS=1 ++ ./rsbpp m10M10I1r1,4,8sFv ++ grep spmm- -+ test 9 = 9 ++ wc -l ++ test 9 = 9 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1,4,8sFv ++ grep spmm- +++ wc -l + test 9 = 9 -++ grep spmm- ++ OMP_NUM_THREADS=1 -++ wc -l ++ ./rsbpp C1000m100M100I1r1sFvtN,T -+ test 3 = 3 ++ grep spmm- +++ wc -l ++ test 3 = 3 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1vtN,TsF +++ grep spmm- ++ wc -l + test 2 = 2 -++ wc -l ++ OMP_NUM_THREADS=1 -++ grep spmm- ++ ./rsbpp C1000m100M100I1r0vtN,TsF +++ grep spmm- +++ wc -l + test 0 = 0 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 @@ -9383,20 +9545,20 @@ ++ OMP_NUM_THREADS=2 ++ RSB_NUM_THREADS=2 ++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF -++ wc -l ++ grep Recursing -+ test 4 = 4 ++ wc -l -++ grep Recursing ++ test 4 = 4 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 ++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF -+ test 208 = 208 ++ grep Recursing ++ wc -l ++ test 208 = 208 ++ OMP_NUM_THREADS=2 ++ RSB_NUM_THREADS=2 ++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF +++ grep Recursing +++ wc -l + test 410 = 410 echo "Skipping tests based on Google Test (not detected at configure time)" Skipping tests based on Google Test (not detected at configure time) @@ -9443,7 +9605,7 @@ 0 0 0 0 before tuning for SPMV: -(3 x 3)[0x55abad3276c0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' +(3 x 3)[0x55db53d73dc0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' ** x: 1.1 @@ -9532,18 +9694,18 @@ BEGIN Rsb_Matrix_test_multimatrix_ms_mnrhs BEGIN -(3 x 3)[0x55abad357740]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.52425: -(3 x 3)[0x55abad35c540]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x55abad357740]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.49077: -(3 x 3)[0x55abad35e210]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x55abad357740]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.72958: -(3 x 3)[0x55abad35cee0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x55abad357740]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.38906: -(3 x 3)[0x55abad35c540]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55db53d81090]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.06791: +(3 x 3)[0x55db53daa830]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55db53d81090]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.19895: +(3 x 3)[0x55db53dabb60]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55db53d81090]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1: +(3 x 3)[0x55db53d81090]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55db53d93490]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.00226: +(3 x 3)[0x55db53daa830]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' END OK: terminating with no allocations registered in librsb [*] tests terminated successfully ! @@ -9560,10 +9722,10 @@ ./rsbtest --no-tune --max_t 0.01 --serial | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q Building ./rsbtest --no-tune --max_t 0.01 --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --no-trans --alpha 1 --type d --rand --serial . | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q adding ! ./rsbtest --mkl A.mkl -running on ionos1-amd64 +running on i-capture-the-hostname Built without the MKL. ( ! ./rsbtest --unrecognized-option-triggers-abort ) -running on ionos1-amd64 +running on i-capture-the-hostname /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest/.libs/rsbtest: unrecognized option '--unrecognized-option-triggers-abort' unrecognized option, aborting. ( ./rsbtest --no-tune --max_t 0.01 --skip-loading-hermitian-matrices --skip-loading-unsymmetric-matrices --tune-maxt 10 --tune-maxr 10 --verbose-tuning --extra-verbose-interface --min_t 0.01 --max_t 0.01 --mintimes 1 --maxtimes 1 --verbose --skip-loading-symmetric-matrices A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q skip ) @@ -9572,7 +9734,7 @@ ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --types all --nthreads 1,2 --maxtimes 1 -+ A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q 2.threads ) ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --render-only A.mtx > /dev/null ) ! ./rsbtest --no-tune --max_t 0.01 --quiet --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --render --no-trans --alpha 1 --type all A.mtx -running on ionos1-amd64 +running on i-capture-the-hostname Will not invoke autotuning routine. Benchmark will sample for at most 0.01 s Built without render support! @@ -9624,7 +9786,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' - /bin/bash ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' + /bin/sh ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' libtool: install: /usr/bin/install -c .libs/librsb.so.0.0.0 /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu/librsb.so.0.0.0 libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so.0 || { rm -f librsb.so.0 && ln -s librsb.so.0.0.0 librsb.so.0; }; }) libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so || { rm -f librsb.so && ln -s librsb.so.0.0.0 librsb.so; }; }) @@ -9634,7 +9796,7 @@ libtool: install: ranlib /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu/librsb.a libtool: warning: remember to run 'libtool --finish /usr/lib/x86_64-linux-gnu' /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' - /bin/bash ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' + /bin/sh ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' libtool: warning: 'librsb.la' has not been installed in '/usr/lib/x86_64-linux-gnu' libtool: install: /usr/bin/install -c .libs/rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin/rsbench /usr/bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' @@ -9762,11 +9924,11 @@ dh_md5sums dh_builddeb dpkg-deb: building package 'librsb0t64-dbgsym' in '../librsb0t64-dbgsym_1.3.0.2+dfsg-7_amd64.deb'. -dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-7_amd64.deb'. -dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-7_all.deb'. dpkg-deb: building package 'librsb0t64' in '../librsb0t64_1.3.0.2+dfsg-7_amd64.deb'. -dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-7_amd64.deb'. dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-7_amd64.deb'. +dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-7_amd64.deb'. +dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-7_amd64.deb'. +dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-7_all.deb'. dpkg-genbuildinfo --build=binary -O../librsb_1.3.0.2+dfsg-7_amd64.buildinfo dpkg-genchanges --build=binary -O../librsb_1.3.0.2+dfsg-7_amd64.changes dpkg-genchanges: info: binary-only upload (no source code included) @@ -9775,12 +9937,14 @@ dpkg-buildpackage: info: binary-only upload (no source included) dpkg-genchanges: info: not including original source code in upload I: copying local configuration +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/B01_cleanup starting +I: user script /srv/workspace/pbuilder/3426253/tmp/hooks/B01_cleanup finished I: unmounting dev/ptmx filesystem I: unmounting dev/pts filesystem I: unmounting dev/shm filesystem I: unmounting proc filesystem I: unmounting sys filesystem I: cleaning the build env -I: removing directory /srv/workspace/pbuilder/287613 and its subdirectories -I: Current time: Fri Feb 21 08:08:39 -12 2025 -I: pbuilder-time-stamp: 1740168519 +I: removing directory /srv/workspace/pbuilder/3426253 and its subdirectories +I: Current time: Fri Mar 27 16:49:37 +14 2026 +I: pbuilder-time-stamp: 1774579777