[or-cvs] r19225: {torflow} Make libsoat use the new antlr code. (torflow/trunk/NetworkScanners)

mikeperry at seul.org mikeperry at seul.org
Mon Apr 6 10:37:49 UTC 2009


Author: mikeperry
Date: 2009-04-06 06:37:49 -0400 (Mon, 06 Apr 2009)
New Revision: 19225

Modified:
   torflow/trunk/NetworkScanners/libsoat.py
Log:

Make libsoat use the new antlr code.



Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py	2009-04-06 10:36:54 UTC (rev 19224)
+++ torflow/trunk/NetworkScanners/libsoat.py	2009-04-06 10:37:49 UTC (rev 19225)
@@ -22,14 +22,30 @@
 sys.path.append("../")
 from TorCtl.TorUtil import *
 
-try:
-  sys.path.append("./libs/pypy-svn/")
-  import pypy.rlib.parsing.parsing
-  import pypy.lang.js.jsparser
-  HAVE_PYPY = True
-except ImportError:
-  HAVE_PYPY = False
+# Antlr stuff
+sys.path.append("./libs/jsparser/")
+import antlr3
+from JavaScriptParser import tokenNames as JSTokenNames
+from JavaScriptLexer import JavaScriptLexer
+from JavaScriptParser import JavaScriptParser
 
+class ParseError(Exception): 
+  def __init__(self, tokens, e):
+    Exception.__init__(self, str(e))
+    self.tokens = tokens
+    self.e = e
+
+class LexerError(Exception): 
+  def __init__(self, tokens, e):
+    Exception.__init__(self, str(e))
+    self.tokens = tokens
+    self.e = e
+
+class ExceptionalJSParser(JavaScriptParser):
+  def displayRecognitionError(self, tokens, e): raise ParseError(tokens, e) 
+class ExceptionalJSLexer(JavaScriptLexer):
+  def displayRecognitionError(self, tokens, e): raise LexerError(tokens, e) 
+
 # constants
 
 TEST_SUCCESS = 0
@@ -818,32 +834,51 @@
 class JSDiffer:
   def __init__(self, js_string):
     self._pickle_revision = 0    
-    if HAVE_PYPY: self.ast_cnts = self._count_ast_elements(js_string)
+    self.ast_cnts = self._count_ast_elements(js_string)
 
   def depickle_upgrade(self):
     pass
 
   def _ast_recursive_worker(ast, ast_cnts):
-    if not ast.symbol in ast_cnts:
-      ast_cnts[ast.symbol] = 1
-    else: ast_cnts[ast.symbol] += 1
-    if isinstance(ast, pypy.rlib.parsing.tree.Nonterminal):
-      for child in ast.children:
-        JSDiffer._ast_recursive_worker(child, ast_cnts)
+    node = JSTokenNames[ast.getType()]
+    if not node in ast_cnts:
+      ast_cnts[node] = 1
+    else: ast_cnts[node] += 1
+
+    for child in ast.getChildren():
+      JSDiffer._ast_recursive_worker(child, ast_cnts)
   _ast_recursive_worker = Callable(_ast_recursive_worker)
- 
+
+  def _antlr_parse(self, js_string):
+    char_stream = antlr3.ANTLRStringStream(js_string)
+    lexer = ExceptionalJSLexer(char_stream)
+    tokens = antlr3.CommonTokenStream(lexer)
+    parser = ExceptionalJSParser(tokens)
+    program = parser.program()
+    return program.tree
+                                              
   def _count_ast_elements(self, js_string, name="global"):
     ast_cnts = {}
     try:
       js_string = js_string.replace("\n\r","\n").replace("\r\n","\n").replace("\r","\n")
-      ast = pypy.lang.js.jsparser.parse(js_string)
+      
+      ast = self._antlr_parse(js_string)
       JSDiffer._ast_recursive_worker(ast, ast_cnts)
-    except (pypy.rlib.parsing.deterministic.LexerError, UnicodeDecodeError, pypy.rlib.parsing.parsing.ParseError), e:
+    except UnicodeDecodeError, e:
+      name+=":"+e.__class__.__name__
+      plog("INFO", "Unicode error "+name+" on "+js_string)
+      if not "ParseError:"+name in ast_cnts:
+        ast_cnts["ParseError:"+name] = 1
+      else: ast_cnts["ParseError:"+name] +=1
+    except (LexerError, ParseError), e:
       # Store info about the name and type of parse error
       # so we can match that up too.
       name+=":"+e.__class__.__name__
-      if "source_pos" in e.__dict__:
-        name+=":"+str(e.source_pos)
+      if "line" in e.e.__dict__: 
+        name+=":"+str(e.e.line)
+      if "token" in e.e.__dict__: 
+        name+=":"+JSTokenNames[e.e.token.type]
+      # XXX: Any other things we want to add?
       plog("INFO", "Parse error "+name+" on "+js_string)
       if not "ParseError:"+name in ast_cnts:
         ast_cnts["ParseError:"+name] = 1
@@ -898,20 +933,14 @@
     return ret
 
   def prune_differences(self, other_string):
-    if not HAVE_PYPY: return
     other_cnts = self._count_ast_elements(other_string)
     self._difference_pruner(other_cnts)
 
   def contains_differences(self, other_string):
-    if not HAVE_PYPY:
-      plog("NOTICE", "PyPy import not present. Not diffing javascript")
-      return False
     other_cnts = self._count_ast_elements(other_string)
     return self._difference_checker(other_cnts) 
 
   def show_differences(self, other_string):
-    if not HAVE_PYPY:
-      return "PyPy import not present. Not diffing javascript"
     other_cnts = self._count_ast_elements(other_string)
     return self._difference_printer(other_cnts) 
 



More information about the tor-commits mailing list