@article{Thompson84, author = {Ken Thompson}, title = {Reflections on Trusting Trust}, journal = {Commun. {ACM}}, volume = {27}, number = {8}, pages = {761--763}, year = {1984}, url = {https://doi.org/10.1145/358198.358210}, doi = {10.1145/358198.358210}, timestamp = {Wed, 14 Nov 2018 10:22:35 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Thompson84.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @misc{wheeler2010fully, title = {Fully Countering Trusting Trust through Diverse Double-Compiling}, author = {David A. Wheeler}, year = {2010}, eprint = {1004.5534}, archiveprefix = {arXiv}, primaryclass = {cs.CR} } @misc{courts2013functional, title = {Functional Package Management with Guix}, author = {Ludovic Courtès}, year = {2013}, eprint = {1305.4584}, archiveprefix = {arXiv}, primaryclass = {cs.PL} } @inproceedings{courtes:hal-01161771, title = {{Reproducible and User-Controlled Software Environments in HPC with Guix}}, author = {Courtès, Ludovic and Wurmus, Ricardo}, url = {https://inria.hal.science/hal-01161771}, booktitle = {{2nd International Workshop on Reproducibility in Parallel Computing (RepPar)}}, address = {Vienne, Austria}, year = {2015}, month = Aug, keywords = {hpc ; reproducible research ; package management ; gestion de paquets ; r{\'e}p{\'e}tabilit{\'e} ; calcul intensif}, pdf = {https://inria.hal.science/hal-01161771v2/file/reproducible-hpc.pdf}, hal_id = {hal-01161771}, hal_version = {v2} } @inproceedings{Ren_2018, series = {ICSE ’18}, title = {Automated localization for unreproducible builds}, url = {http://dx.doi.org/10.1145/3180155.3180224}, doi = {10.1145/3180155.3180224}, booktitle = {Proceedings of the 40th International Conference on Software Engineering}, publisher = {ACM}, author = {Ren, Zhilei and Jiang, He and Xuan, Jifeng and Yang, Zijiang}, year = {2018}, month = may, collection = {ICSE ’18} } @inproceedings{8935014, author = {Tapas, Nachiket and Longo, Francesco and Merlino, Giovanni and Puliafito, Antonio}, booktitle = {2019 IEEE 18th International Symposium on Network Computing and Applications (NCA)}, title = {Transparent, Provenance-assured, and Secure Software-as-a-Service}, year = {2019}, volume = {}, number = {}, pages = {1-8}, keywords = {Cloud computing;Software as a service;Blockchain;Proposals;Cryptography;Software-as-a-Service;OpenStack;Heat;service orchestration;blockchain;Ethereum;Tendermint;Trillian}, doi = {10.1109/NCA.2019.8935014} } @inproceedings{10.5555/3361338.3361435, author = {Torres-Arias, Santiago and Afzali, Hammad and Kuppusamy, Trishank Karthik and Curtmola, Reza and Cappos, Justin}, title = {In-toto: providing farm-to-table guarantees for bits and bytes}, year = {2019}, month = aug, isbn = {9781939133069}, publisher = {USENIX Association}, address = {USA}, abstract = {The software development process is quite complex and involves a number of independent actors. Developers check source code into a version control system, the code is compiled into software at a build farm, and CI/CD systems run multiple tests to ensure the software's quality among a myriad of other operations. Finally, the software is packaged for distribution into a delivered product, to be consumed by end users. An attacker that is able to compromise any single step in the process can maliciously modify the software and harm any of the software's users.To address these issues, we designed in-toto, a framework that cryptographically ensures the integrity of the software supply chain. in-toto grants the end user the ability to verify the software's supply chain from the project's inception to its deployment. We demonstrate in-toto's effectiveness on 30 software supply chain compromises that affected hundreds of million of users and showcase in-toto's usage over cloud-native, hybrid-cloud and cloud-agnostic applications. in-toto is integrated into products and open source projects that are used by millions of people daily. The project website is available at: https://in-toto:io.}, booktitle = {Proceedings of the 28th USENIX Conference on Security Symposium}, pages = {1393–1410}, numpages = {18}, location = {Santa Clara, CA, USA}, series = {SEC'19}, url = {https://www.usenix.org/conference/usenixsecurity19/presentation/torres-arias}, pages = {1393--1410} } @inbook{Ohm_2020, title = {Backstabber’s Knife Collection: A Review of Open Source Software Supply Chain Attacks}, isbn = {9783030526832}, issn = {1611-3349}, url = {http://dx.doi.org/10.1007/978-3-030-52683-2_2}, doi = {10.1007/978-3-030-52683-2_2}, booktitle = {Lecture Notes in Computer Science}, publisher = {Springer International Publishing}, author = {Ohm, Marc and Plate, Henrik and Sykosch, Arnold and Meier, Michael}, year = {2020}, pages = {23–43} } @inproceedings{10.1145/3373376.3378519, author = {Navarro Leija, Omar S. and Shiptoski, Kelly and Scott, Ryan G. and Wang, Baojun and Renner, Nicholas and Newton, Ryan R. and Devietti, Joseph}, title = {Reproducible Containers}, year = {2020}, isbn = {9781450371025}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3373376.3378519}, doi = {10.1145/3373376.3378519}, abstract = {We describe the design and implementation of DetTrace, a reproducible container abstraction for Linux implemented in user space. All computation that occurs inside a DetTrace container is a pure function of the initial filesystem state of the container. Reproducible containers can be used for a variety of purposes, including replication for fault-tolerance, reproducible software builds and reproducible data analytics. We use DetTrace to achieve, in an automatic fashion, reproducibility for 12,130 Debian package builds, containing over 800 million lines of code, as well as bioinformatics and machine learning workflows. We show that, while software in each of these domains is initially irreproducible, DetTrace brings reproducibility without requiring any hardware, OS or application changes. DetTrace's performance is dictated by the frequency of system calls: IO-intensive software builds have an average overhead of 3.49x, while a compute-bound bioinformatics workflow is under 2\%.}, booktitle = {Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems}, pages = {167–182}, numpages = {16}, keywords = {software containers, reproducibility, linux, determinism}, location = {Lausanne, Switzerland}, series = {ASPLOS '20} } @inproceedings{10.1145/3407023.3409183, author = {Ohm, Marc and Sykosch, Arnold and Meier, Michael}, title = {Towards detection of software supply chain attacks by forensic artifacts}, year = {2020}, isbn = {9781450388337}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3407023.3409183}, doi = {10.1145/3407023.3409183}, abstract = {Third-party dependencies may introduce security risks to the software supply chain and hence yield harm to their dependent software. There are many known cases of malicious open source packages posing risks to developers and end users. However, while efforts are made to detect vulnerable open source packages, malicious packages are not yet considered explicitly. In order to tackle this problem we perform an exploratory case study on previously occurred attacks on the software supply chain with respect to observable artifacts created. Based on gained insights, we propose Buildwatch, a framework for dynamic analysis of software and its third-party dependencies. We noticed that malicious packages introduce a significant amount of new artifacts during installation when compared to benign versions of the same package. The paper presents a first analysis of observable artifacts of malicious packages as well as a possible mitigation strategy that might lead to more insight in long term.}, booktitle = {Proceedings of the 15th International Conference on Availability, Reliability and Security}, articleno = {65}, numpages = {6}, keywords = {DevSecOps, application security, malware, supply chain attack}, location = {Virtual Event, Ireland}, series = {ARES '20} } @article{9403390, author = {Lamb, Chris and Zacchiroli, Stefano}, journal = {IEEE Software}, title = {Reproducible Builds: Increasing the Integrity of Software Supply Chains}, year = {2022}, volume = {39}, number = {2}, pages = {62-70}, keywords = {Software;Linux;Supply chains;Reproducibility of results;Metadata;Buildings;Tools}, doi = {10.1109/MS.2021.3073045} } @article{9465650, author = {Shi, Yong and Wen, Mingzhi and Cogo, Filipe R. and Chen, Boyuan and Jiang, Zhen Ming}, journal = {IEEE Transactions on Software Engineering}, title = {An Experience Report on Producing Verifiable Builds for Large-Scale Commercial Systems}, year = {2022}, volume = {48}, number = {9}, pages = {3361-3377}, keywords = {Security;Software;Tools;Process control;System implementation;Software engineering;Safety;Verifiable build;large scale commercial system;build system;security;trustworthiness;software engineering}, doi = {10.1109/TSE.2021.3092692} } @inproceedings{10.1145/3510003.3510102, author = {Ren, Zhilei and Sun, Shiwei and Xuan, Jifeng and Li, Xiaochen and Zhou, Zhide and Jiang, He}, title = {Automated patching for unreproducible builds}, year = {2022}, isbn = {9781450392211}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3510003.3510102}, doi = {10.1145/3510003.3510102}, abstract = {Software reproducibility plays an essential role in establishing trust between source code and the built artifacts, by comparing compilation outputs acquired from independent users. Although the testing for unreproducible builds could be automated, fixing unreproducible build issues poses a set of challenges within the reproducible builds practice, among which we consider the localization granularity and the historical knowledge utilization as the most significant ones. To tackle these challenges, we propose a novel approach RepFix that combines tracing-based fine-grained localization with history-based patch generation mechanisms.On the one hand, to tackle the localization granularity challenge, we adopt system-level dynamic tracing to capture both the system call traces and user-space function call information. By integrating the kernel probes and user-space probes, we could determine the location of each executed build command more accurately. On the other hand, to tackle the historical knowledge utilization challenge, we design a similarity based relevant patch retrieving mechanism, and generate patches by applying the edit operations of the existing patches. With the abundant patches accumulated by the reproducible builds practice, we could generate patches to fix the unreproducible builds automatically.To evaluate the usefulness of RepFix, extensive experiments are conducted over a dataset with 116 real-world packages. Based on RepFix, we successfully fix the unreproducible build issues for 64 packages. Moreover, we apply RepFix to the Arch Linux packages, and successfully fix four packages. Two patches have been accepted by the repository, and there is one package for which the patch is pushed and accepted by its upstream repository, so that the fixing could be helpful for other downstream repositories.}, booktitle = {Proceedings of the 44th International Conference on Software Engineering}, pages = {200–211}, numpages = {12}, keywords = {automated patch generation, dynamic tracing, reproducible builds}, location = {Pittsburgh, Pennsylvania}, series = {ICSE '22} } @article{9740718, author = {Enck, William and Williams, Laurie}, journal = {IEEE Security & Privacy}, title = {Top Five Challenges in Software Supply Chain Security: Observations From 30 Industry and Government Organizations}, year = {2022}, volume = {20}, number = {2}, pages = {96-100}, keywords = {Industries;Privacy;Codes;Supply chains;Government;Ecosystems;Software development management}, doi = {10.1109/MSEC.2022.3142338} } @article{Butler2023, author = {Butler, Simon and Gamalielsson, Jonas and Lundell, Bj{\"o}rn and Brax, Christoffer and Mattsson, Anders and Gustavsson, Tomas and Feist, Jonas and Kvarnstr{\"o}m, Bengt and L{\"o}nroth, Erik}, title = {On business adoption and use of reproducible builds for open and closed source software}, journal = {Software Quality Journal}, year = {2023}, month = {Sep}, day = {01}, volume = {31}, number = {3}, pages = {687-719}, abstract = {Reproducible builds (R-Bs) are software engineering practices that reliably create bit-for-bit identical binary executable files from specified source code. R-Bs are applied in some open source software (OSS) projects and distributions to allow verification that the distributed binary has been built from the released source code. The use of R-Bs has been advocated in software maintenance and R-Bs are applied in the development of some OSS security applications. Nonetheless, industry application of R-Bs appears limited, and we seek to understand whether awareness is low or if significant technical and business reasons prevent wider adoption. Through interviews with software practitioners and business managers, this study explores the utility of applying R-Bs in businesses in the primary and secondary software sectors and the business and technical reasons supporting their adoption. We find businesses use R-Bs in the safety-critical and security domains, and R-Bs are valuable for traceability and support collaborative software development. We also found that R-Bs are valued as engineering processes and are seen as a badge of software quality, but without a tangible value proposition. There are good engineering reasons to use R-Bs in industrial software development, and the principle of establishing correspondence between source code and binary offers opportunities for the development of further applications.}, issn = {1573-1367}, doi = {10.1007/s11219-022-09607-z}, url = {https://doi.org/10.1007/s11219-022-09607-z} } @inproceedings{10179320, author = {M. Fourne and D. Wermke and W. Enck and S. Fahl and Y. Acar}, booktitle = {2023 IEEE Symposium on Security and Privacy (SP)}, title = {It’s like flossing your teeth: On the Importance and Challenges of Reproducible Builds for Software Supply Chain Security}, year = {2023}, volume = {}, issn = {}, pages = {1527-1544}, abstract = {The 2020 Solarwinds attack was a tipping point that caused a heightened awareness about the security of the software supply chain and in particular the large amount of trust placed in build systems. Reproducible Builds (R-Bs) provide a strong foundation to build defenses for arbitrary attacks against build systems by ensuring that given the same source code, build environment, and build instructions, bitwise-identical artifacts are created. Unfortunately, much of the software industry believes R-Bs are too far out of reach for most projects. The goal of this paper is to help identify a path for R-Bs to become a commonplace property.To this end, we conducted a series of 24 semi-structured expert interviews with participants from the Reproducible-Builds.org project, finding that self-effective work by highly motivated developers and collaborative communication with upstream projects are key contributors to R-Bs. We identified a range of motivations that can encourage open source developers to strive for R-Bs, including indicators of quality, security benefits, and more efficient caching of artifacts. We also identify experiences that help and hinder adoption, which often revolves around communication with upstream projects. We conclude with recommendations on how to better integrate R-Bs with the efforts of the open source and free software community.}, keywords = {industries;privacy;source coding;supply chains;collaboration;software;time measurement}, doi = {10.1109/SP46215.2023.10179320}, url = {https://doi.ieeecomputersociety.org/10.1109/SP46215.2023.10179320}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, month = {may} } @misc{schorlemmer2024signing, title = {Signing in Four Public Software Package Registries: Quantity, Quality, and Influencing Factors}, author = {Taylor R Schorlemmer and Kelechi G Kalu and Luke Chigges and Kyung Myung Ko and Eman Abdul-Muhd Abu Isghair and Saurabh Baghi and Santiago Torres-Arias and James C Davis}, year = {2024}, eprint = {2401.14635}, archiveprefix = {arXiv}, primaryclass = {cs.CR} } @misc{malka2024reproducibility, title = {Reproducibility of Build Environments through Space and Time}, author = {Julien Malka and Stefano Zacchiroli and Théo Zimmermann}, year = {2024}, eprint = {2402.00424}, archiveprefix = {arXiv}, primaryclass = {cs.SE} } @inproceedings{randrianaina:hal-04441579, title = {{Options Matter: Documenting and Fixing Non-Reproducible Builds in Highly-Configurable Systems}}, author = {Randrianaina, Georges Aaron and Khelladi, Djamel Eddine and Zendra, Olivier and Acher, Mathieu}, url = {https://inria.hal.science/hal-04441579}, booktitle = {{MSR 2024 - 21th International Conference on Mining Software Repository}}, address = {Lisbon, Portugal}, pages = {1-11}, year = {2024}, month = Apr, keywords = {Reproducible Builds ; Build System ; Highly-configurable System}, pdf = {https://inria.hal.science/hal-04441579v2/file/msr24.pdf}, hal_id = {hal-04441579}, hal_version = {v2} } @masterthesis{dellaiera_2024_12666899, author = {Dellaiera, Pol}, title = {Reproducibility in Software Engineering}, school = {University of Mons}, year = 2024, month = jul, doi = {10.5281/zenodo.12666899}, url = {https://doi.org/10.5281/zenodo.12666899} }