commit c700693be807efa2a1f944798a49edf021087cdf Author: Nicolas Vigier boklm@torproject.org Date: Fri Apr 23 15:45:17 2021 +0200
Bug 40025: Use hard link to prepare input_files
When using remote_exec, we collect input_files in a temporary directory, before copying them to the "remote" (for example a container). As we don't normally modify the files inside this temporary directory, it is safe to use hard link rather than copies of the files.
When remote_exec is not used, we don't use hard links by default, but link_input_files can be set to 1 to use hard links. --- doc/rbm_config.asc | 9 +++++++++ doc/rbm_remote.asc | 4 +++- lib/RBM.pm | 15 ++++++++++----- lib/RBM/DefaultConfig.pm | 1 + 4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/doc/rbm_config.asc b/doc/rbm_config.asc index 4c65a73..eaec60f 100644 --- a/doc/rbm_config.asc +++ b/doc/rbm_config.asc @@ -232,6 +232,15 @@ input_files_paths:: current project and its dependencies. This is useful when cleaning old build files, to find which ones are still used.
+link_input_files:: + When building a project, input files are collected in a + temporary directory. If this option is set to 1, we try to use + hard links instead of copies. You should only enable this if + you don't modify the input files during the build, or if you + are using +remote_exec+ (in which case the temporary directory + is only used to copy files to the remote). This option is + disabled by default, unless +remote_exec+ is used. + timestamp:: This is the UNIX timestamp, set as modification time on files created such as the sources tarball. The default is to use the diff --git a/doc/rbm_remote.asc b/doc/rbm_remote.asc index 81de98f..7f4c3d8 100644 --- a/doc/rbm_remote.asc +++ b/doc/rbm_remote.asc @@ -46,7 +46,9 @@ remote_start:: we can access it. This can be useful for instance if you need to start a VM, or fetch a container. If access to an input file is needed, the +remote_srcdir+ option is pointing to a temporary - directory containing the input files. + directory containing the input files. The input files are hard + links to their original location, if it is on the same + filesystem, and +link_input_files+ has not been set to 0.
remote_finish:: the template of a command that will stop the remote host after diff --git a/lib/RBM.pm b/lib/RBM.pm index bc509cb..b93488f 100644 --- a/lib/RBM.pm +++ b/lib/RBM.pm @@ -766,7 +766,10 @@ sub input_file_id { }
sub recursive_copy { - my ($fname, $name, $dest_dir) = @_; + my ($fname, $name, $dest_dir, $action) = @_; + if (-f $fname && $action eq 'link') { + return ($name) if link $fname, "$dest_dir/$name"; + } if (-f $fname || -l $fname) { fcopy($fname, "$dest_dir/$name"); return ($name); @@ -774,7 +777,7 @@ sub recursive_copy { my @copied; mkdir "$dest_dir/$name"; foreach my $f (map { $_->basename } path($fname)->children) { - push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir); + push @copied, recursive_copy("$fname/$f", "$name/$f", $dest_dir, $action); } return @copied; } @@ -978,12 +981,12 @@ sub input_files { my $file_type = -d $fname ? 'directory' : 'file'; print "Using $file_type $fname\n"; mkdir dirname("$dest_dir/$name"); - push @res_copy, recursive_copy($fname, $name, $dest_dir); + push @res_copy, recursive_copy($fname, $name, $dest_dir, $action); } chdir $old_cwd; RETURN_RES: return sha256_hex($input_files_id) if $action eq 'input_files_id'; - return @res_copy if $action eq 'copy'; + return @res_copy if ($action eq 'copy' || $action eq 'link'); return %res_getfnames if $action eq 'getfnames'; return @res_getfpaths if $action eq 'getfpaths'; } @@ -1034,7 +1037,9 @@ sub build_run { my $srcdir = $tmpdir->dirname; my @cfiles; push @cfiles, copy_files($project, $srcdir); - push @cfiles, input_files('copy', $project, $options, $srcdir); + my $if_action = project_config($project, 'link_input_files', $options) ? + 'link' : 'copy'; + push @cfiles, input_files($if_action, $project, $options, $srcdir); my $tarfile = maketar($project, $options, $srcdir); push @cfiles, $tarfile if $tarfile; my ($remote_tmp_src, $remote_tmp_dst, %build_script); diff --git a/lib/RBM/DefaultConfig.pm b/lib/RBM/DefaultConfig.pm index c19d160..408044e 100644 --- a/lib/RBM/DefaultConfig.pm +++ b/lib/RBM/DefaultConfig.pm @@ -444,6 +444,7 @@ ZIP_END input_files_by_name => sub { RBM::input_files('getfnames', @_); }, input_files_id => sub { RBM::input_files('input_files_id', @_); }, input_files_paths => sub { RBM::input_files('getfpaths', @_); }, + link_input_files => '[% IF c("remote_exec") %]1[% END %]', steps => { }, suexec => 'sudo -- [% c("suexec_cmd") %]',