[or-cvs] r16022: Update to wml2po.py and po2wml.py to include some additional (translation/trunk/tools)

ampleyfly at seul.org ampleyfly at seul.org
Thu Jul 17 15:50:07 UTC 2008


Author: ampleyfly
Date: 2008-07-17 11:50:07 -0400 (Thu, 17 Jul 2008)
New Revision: 16022

Modified:
   translation/trunk/tools/po2wml.py
   translation/trunk/tools/wml2po.py
Log:
Update to wml2po.py and po2wml.py to include some additional tags.
Pairs of tags within pairs of tags will not generate strings, as this would cause duplication.



Modified: translation/trunk/tools/po2wml.py
===================================================================
--- translation/trunk/tools/po2wml.py	2008-07-17 15:14:20 UTC (rev 16021)
+++ translation/trunk/tools/po2wml.py	2008-07-17 15:50:07 UTC (rev 16022)
@@ -6,29 +6,44 @@
 	print 'Usage: po2wml [-h | --help] [-i | --input] [-o | --output] [-t | --template]'
 
 def unescape(string):
-	return string.replace('\\n', '\n')
+	return string.replace('\\n', '\n').replace('\\"', '"')
 
 def replace_string(indata, matchiter, string):
+	# currently assumes order of strings in po file and in template wml file is the same
 	stack = []
 
 	while True:
 		try:
 			match = matchiter.next()
 			if match.group(1):
-				print 'paragraph closed'
-				start = stack.pop()
+				# a tag was closed
+
+				# do not match if not same type as last opened tag,
+				#  doubt it if this check is needed
+				if match.group(2) != stack[-1][0]:
+					continue
+
+				tagname, start = stack.pop()
+
+				# make sure we don't pick strings contained in others
+				if len(stack) > 0:
+					continue
+
 				end = match.start()
+
 				return indata[:start] + string + indata[end:], matchiter
 			else:
-				print 'paragraph opened'
-				stack.append(match.end())
+				# a tag was opened
+				tagname = match.group(2)
+				stack.append((tagname, match.end()))
 		except StopIteration:
-			print 'couldn not find string to replace with %s' % string
+			print 'could not find string to replace with %s' % string
 			break
 
 def parse(infile, outfile, template):
 	templatedata = template.read()
-	regex = re.compile('<(/?)p\s*>')
+	# this regex matches opening and closing tags of types p, h*, pre, a, ul, dl and blockquote
+	regex = re.compile('<(/?)(p|h\d|pre|a|ul|dl|blockquote)\s*((\w+|(\w+\s?=\s?(\w|\"(.|\s)*?\"|\'(.|\s)*?\')))\s*)*>')
 	matchiter = regex.finditer(templatedata)
 
 	for line in infile:
@@ -38,7 +53,7 @@
 			string = line[7:].lstrip(' "').rstrip(' "\n')
 			string = unescape(string)
 			if string != '':
-				print 'trying to substitute for %s from %s' % (string, line)
+				#print 'trying to substitute for %s from %s' % (string, line)
 				templatedata, matchiter = replace_string(templatedata, matchiter, string)
 			else:
 				# ugly way of stepping past current paragraph

Modified: translation/trunk/tools/wml2po.py
===================================================================
--- translation/trunk/tools/wml2po.py	2008-07-17 15:14:20 UTC (rev 16021)
+++ translation/trunk/tools/wml2po.py	2008-07-17 15:50:07 UTC (rev 16022)
@@ -2,17 +2,19 @@
 import getopt
 import re
 
+# is there a nicer way to do this?
 string_no = 1
 
 def usage():
 	print 'Usage: wml2po [-h | --help] [-i | --input] [-o | --output]'
 
 def escape(string):
-	return string.replace('\n', '\\n')
+	return string.replace('\n', '\\n').replace('"', '\\"')
 
-def add_po_string(filename, outfile, string):
+def add_po_string(filename, outfile, string, tagname):
+	# something different that string number should probably be given, like the line number
 	global string_no
-	outdata =  '#: %s:%d\n' % (filename, string_no)
+	outdata =  '#: %s:%d:%s\n' % (filename, string_no, tagname)
 	outdata += 'msgid "%s"\n' % escape(string)
 	outdata += 'msgstr ""\n'
 	outdata += '\n'
@@ -21,23 +23,35 @@
 
 def parse(infile, outfile):
 	indata = infile.read()
-	regex = re.compile('<(/?)p\s*>')
+	# this regex matches opening and closing tags of types p, h*, pre, a, ul, dl and blockquote
+	regex = re.compile('<(/?)(p|h\d|pre|a|ul|dl|blockquote)\s*((\w+|(\w+\s?=\s?(\w|\"(.|\s)*?\"|\'(.|\s)*?\')))\s*)*>')
 	matchiter = regex.finditer(indata)
 
 	stack = []
 
 	for match in matchiter:
 		if match.group(1):
-			#print 'paragraph closed'
-			start = stack.pop()
+			# closing tag found
+
+			# do not match if not same type as last opened tag,
+			#  doubt it if this check is needed
+			if match.group(2) != stack[-1][0]:
+				continue
+
+			tagname, start = stack.pop()
+
+			# make sure we don't pick strings contained in others, to avoid duplication.
+			if len(stack) > 0:
+				continue
+
 			end = match.start()
 			contents = indata[start:end]
-			#print 'start %d, end %d, contents: %s' % (start, end, contents)
 
-			add_po_string(infile.name, outfile, contents)
+			add_po_string(infile.name, outfile, contents, tagname)
 		else:
-			#print 'paragraph opened'
-			stack.append(match.end())
+			# opening tag found
+			tagname = match.group(2)
+			stack.append((tagname, match.end()))
 
 def main(argv):
 	try:
@@ -69,6 +83,8 @@
 	if outfile == None: outfile = sys.stdout 
 
 	parse(infile, outfile)
+	infile.close()
+	outfile.close()
 
 if __name__ == '__main__':
 	main(sys.argv[1:])



More information about the tor-commits mailing list