commit c700693be807efa2a1f944798a49edf021087cdf
Author: Nicolas Vigier <boklm(a)torproject.org>
Date: Fri Apr 23 15:45:17 2021 +0200
Bug 40025: Use hard link to prepare input_files
When using remote_exec, we collect input_files in a temporary directory,
before copying them to the "remote" (for example a container). As we
don't normally modify the files inside this temporary directory, it is
safe to use hard link rather than copies of the files.
When remote_exec is not used, we don't use hard links by default, but
link_input_files can be set to 1 to use hard links.
---
doc/rbm_config.asc | 9 +++++++++
doc/rbm_remote.asc | 4 +++-
lib/RBM.pm | 15 ++++++++++-----
lib/RBM/DefaultConfig.pm | 1 +
4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/doc/rbm_config.asc b/doc/rbm_config.asc
index 4c65a73..eaec60f 100644
--- a/doc/rbm_config.asc
+++ b/doc/rbm_config.asc
@@ -232,6 +232,15 @@ input_files_paths::
current project and its dependencies. This is useful when
cleaning old build files, to find which ones are still used.
+link_input_files::
+ When building a project, input files are collected in a
+ temporary directory. If this option is set to 1, we try to use
+ hard links instead of copies. You should only enable this if
+ you don't modify the input files during the build, or if you
+ are using +remote_exec+ (in which case the temporary directory
+ is only used to copy files to the remote). This option is
+ disabled by default, unless +remote_exec+ is used.
+
timestamp::
This is the UNIX timestamp, set as modification time on files
created such as the sources tarball. The default is to use the
diff --git a/doc/rbm_remote.asc b/doc/rbm_remote.asc
index 81de98f..7f4c3d8 100644
--- a/doc/rbm_remote.asc
+++ b/doc/rbm_remote.asc
@@ -46,7 +46,9 @@ remote_start::
we can access it. This can be useful for instance if you need
to start a VM, or fetch a container. If access to an input file is
needed, the +remote_srcdir+ option is pointing to a temporary
- directory containing the input files.
+ directory containing the input files. The input files are hard
+ links to their original location, if it is on the same
+ filesystem, and +link_input_files+ has not been set to 0.
remote_finish::
the template of a command that will stop the remote host after
diff --git a/lib/RBM.pm b/lib/RBM.pm
index bc509cb..b93488f 100644
--- a/lib/RBM.pm
+++ b/lib/RBM.pm
@@ -766,7 +766,10 @@ sub input_file_id {
}
sub recursive_copy {
- my ($fname, $name, $dest_dir) = @_;
+ my ($fname, $name, $dest_dir, $action) = @_;
+ if (-f $fname && $action eq 'link') {
+ return ($name) if link $fname, "$dest_dir/$name";
+ }
if (-f $fname || -l $fname) {
fcopy($fname, "$dest_dir/$name");
return ($name);
@@ -774,7 +777,7 @@ sub recursive_copy {
my @copied;
mkdir "$dest_dir/$name";
foreach my $f (map { $_->basename } path($fname)->children) {
- push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir);
+ push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir, $action);
}
return @copied;
}
@@ -978,12 +981,12 @@ sub input_files {
my $file_type = -d $fname ? 'directory' : 'file';
print "Using $file_type $fname\n";
mkdir dirname("$dest_dir/$name");
- push @res_copy, recursive_copy($fname, $name, $dest_dir);
+ push @res_copy, recursive_copy($fname, $name, $dest_dir, $action);
}
chdir $old_cwd;
RETURN_RES:
return sha256_hex($input_files_id) if $action eq 'input_files_id';
- return @res_copy if $action eq 'copy';
+ return @res_copy if ($action eq 'copy' || $action eq 'link');
return \%res_getfnames if $action eq 'getfnames';
return \@res_getfpaths if $action eq 'getfpaths';
}
@@ -1034,7 +1037,9 @@ sub build_run {
my $srcdir = $tmpdir->dirname;
my @cfiles;
push @cfiles, copy_files($project, $srcdir);
- push @cfiles, input_files('copy', $project, $options, $srcdir);
+ my $if_action = project_config($project, 'link_input_files', $options) ?
+ 'link' : 'copy';
+ push @cfiles, input_files($if_action, $project, $options, $srcdir);
my $tarfile = maketar($project, $options, $srcdir);
push @cfiles, $tarfile if $tarfile;
my ($remote_tmp_src, $remote_tmp_dst, %build_script);
diff --git a/lib/RBM/DefaultConfig.pm b/lib/RBM/DefaultConfig.pm
index c19d160..408044e 100644
--- a/lib/RBM/DefaultConfig.pm
+++ b/lib/RBM/DefaultConfig.pm
@@ -444,6 +444,7 @@ ZIP_END
input_files_by_name => sub { RBM::input_files('getfnames', @_); },
input_files_id => sub { RBM::input_files('input_files_id', @_); },
input_files_paths => sub { RBM::input_files('getfpaths', @_); },
+ link_input_files => '[% IF c("remote_exec") %]1[% END %]',
steps => {
},
suexec => 'sudo -- [% c("suexec_cmd") %]',