commit 0ce10154e9686f22deed58b6e627283bde9677a3 Author: Damian Johnson atagar@torproject.org Date: Fri Jan 15 10:00:34 2016 -0800
Skip empty files in descriptor tarballs
While reading a descriptor tarball from CollecTor we choked due to an empty file...
Traceback (most recent call last): File "metrics.py", line 81, in <module> measure_average_advertised_bandwidth('/home/atagar/Desktop/server-descriptors-2015-11.tar') File "metrics.py", line 9, in measure_average_advertised_bandwidth for desc in parse_file(path): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 171, in parse_file for desc in handler(descriptor_file, descriptor_type, validate, document_handler, **kwargs): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 246, in _parse_file_for_tar_path for desc in parse_file(tar_file, *args, **kwargs): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 171, in parse_file for desc in handler(descriptor_file, descriptor_type, validate, document_handler, **kwargs): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 261, in _parse_file_for_tarfile for desc in parse_file(entry, *args, **kwargs): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 228, in parse_file for desc in parse(descriptor_file): File "/home/atagar/Desktop/stem/stem/descriptor/__init__.py", line 226, in parse raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line)) TypeError: Unable to determine the descriptor's type. filename: 'f4b03227e12be3353af32761cbd25a62afb9ea42', first line: ''
In particular this was from...
server-descriptors-2015-11/f/4/f4b03227e12be3353af32761cbd25a62afb9ea42
Not sure if this is a problem or not on CollecTor's end but making us a little more resilient by skipping empty files in tarballs. --- stem/descriptor/__init__.py | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index a2877c8..6b18d65 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -256,6 +256,9 @@ def _parse_file_for_tarfile(descriptor_file, *args, **kwargs): if tar_entry.isfile(): entry = descriptor_file.extractfile(tar_entry)
+ if entry.size == 0: + continue + try: for desc in parse_file(entry, *args, **kwargs): desc._set_archive_path(entry.name)
tor-commits@lists.torproject.org