[tor-commits] [rbm/master] Bug 40025: Use hard link to prepare input_files

gk at torproject.org gk at torproject.org
Mon May 10 08:50:02 UTC 2021


commit c700693be807efa2a1f944798a49edf021087cdf
Author: Nicolas Vigier <boklm at torproject.org>
Date:   Fri Apr 23 15:45:17 2021 +0200

    Bug 40025: Use hard link to prepare input_files
    
    When using remote_exec, we collect input_files in a temporary directory,
    before copying them to the "remote" (for example a container). As we
    don't normally modify the files inside this temporary directory, it is
    safe to use hard link rather than copies of the files.
    
    When remote_exec is not used, we don't use hard links by default, but
    link_input_files can be set to 1 to use hard links.
---
 doc/rbm_config.asc       |  9 +++++++++
 doc/rbm_remote.asc       |  4 +++-
 lib/RBM.pm               | 15 ++++++++++-----
 lib/RBM/DefaultConfig.pm |  1 +
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/doc/rbm_config.asc b/doc/rbm_config.asc
index 4c65a73..eaec60f 100644
--- a/doc/rbm_config.asc
+++ b/doc/rbm_config.asc
@@ -232,6 +232,15 @@ input_files_paths::
         current project and its dependencies. This is useful when
         cleaning old build files, to find which ones are still used.
 
+link_input_files::
+        When building a project, input files are collected in a
+        temporary directory. If this option is set to 1, we try to use
+        hard links instead of copies. You should only enable this if
+        you don't modify the input files during the build, or if you
+        are using +remote_exec+ (in which case the temporary directory
+        is only used to copy files to the remote). This option is
+        disabled by default, unless +remote_exec+ is used.
+
 timestamp::
         This is the UNIX timestamp, set as modification time on files
         created such as the sources tarball. The default is to use the
diff --git a/doc/rbm_remote.asc b/doc/rbm_remote.asc
index 81de98f..7f4c3d8 100644
--- a/doc/rbm_remote.asc
+++ b/doc/rbm_remote.asc
@@ -46,7 +46,9 @@ remote_start::
         we can access it. This can be useful for instance if you need
         to start a VM, or fetch a container. If access to an input file is
         needed, the +remote_srcdir+ option is pointing to a temporary
-        directory containing the input files.
+        directory containing the input files. The input files are hard
+        links to their original location, if it is on the same
+        filesystem, and +link_input_files+ has not been set to 0.
 
 remote_finish::
         the template of a command that will stop the remote host after
diff --git a/lib/RBM.pm b/lib/RBM.pm
index bc509cb..b93488f 100644
--- a/lib/RBM.pm
+++ b/lib/RBM.pm
@@ -766,7 +766,10 @@ sub input_file_id {
 }
 
 sub recursive_copy {
-    my ($fname, $name, $dest_dir) = @_;
+    my ($fname, $name, $dest_dir, $action) = @_;
+    if (-f $fname && $action eq 'link') {
+        return ($name) if link $fname, "$dest_dir/$name";
+    }
     if (-f $fname || -l $fname) {
         fcopy($fname, "$dest_dir/$name");
         return ($name);
@@ -774,7 +777,7 @@ sub recursive_copy {
     my @copied;
     mkdir "$dest_dir/$name";
     foreach my $f (map { $_->basename } path($fname)->children) {
-        push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir);
+        push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir, $action);
     }
     return @copied;
 }
@@ -978,12 +981,12 @@ sub input_files {
         my $file_type = -d $fname ? 'directory' : 'file';
         print "Using $file_type $fname\n";
         mkdir dirname("$dest_dir/$name");
-        push @res_copy, recursive_copy($fname, $name, $dest_dir);
+        push @res_copy, recursive_copy($fname, $name, $dest_dir, $action);
     }
     chdir $old_cwd;
     RETURN_RES:
     return sha256_hex($input_files_id) if $action eq 'input_files_id';
-    return @res_copy if $action eq 'copy';
+    return @res_copy if ($action eq 'copy' || $action eq 'link');
     return \%res_getfnames if $action eq 'getfnames';
     return \@res_getfpaths if $action eq 'getfpaths';
 }
@@ -1034,7 +1037,9 @@ sub build_run {
     my $srcdir = $tmpdir->dirname;
     my @cfiles;
     push @cfiles, copy_files($project, $srcdir);
-    push @cfiles, input_files('copy', $project, $options, $srcdir);
+    my $if_action = project_config($project, 'link_input_files', $options) ?
+                                'link' : 'copy';
+    push @cfiles, input_files($if_action, $project, $options, $srcdir);
     my $tarfile = maketar($project, $options, $srcdir);
     push @cfiles, $tarfile if $tarfile;
     my ($remote_tmp_src, $remote_tmp_dst, %build_script);
diff --git a/lib/RBM/DefaultConfig.pm b/lib/RBM/DefaultConfig.pm
index c19d160..408044e 100644
--- a/lib/RBM/DefaultConfig.pm
+++ b/lib/RBM/DefaultConfig.pm
@@ -444,6 +444,7 @@ ZIP_END
     input_files_by_name => sub { RBM::input_files('getfnames', @_); },
     input_files_id => sub { RBM::input_files('input_files_id', @_); },
     input_files_paths => sub { RBM::input_files('getfpaths', @_); },
+    link_input_files => '[% IF c("remote_exec") %]1[% END %]',
     steps => {
     },
     suexec => 'sudo -- [% c("suexec_cmd") %]',



More information about the tor-commits mailing list