[tor-commits] [Git][tpo/applications/tor-browser][tor-browser-128.3.0esr-14.0-1] 2 commits: fixup! Bug 42305: Add script to combine translation files across versions.

Pier Angelo Vendrame (@pierov) git at gitlab.torproject.org
Tue Oct 1 15:37:06 UTC 2024



Pier Angelo Vendrame pushed to branch tor-browser-128.3.0esr-14.0-1 at The Tor Project / Applications / Tor Browser


Commits:
459f1275 by Henry Wilkes at 2024-10-01T16:05:28+01:00
fixup! Bug 42305: Add script to combine translation files across versions.

Bug 43156: Add an option to also include strings from a legacy branch.

Also, instead of using the tagger date to find the highest version
branch, we use version ordering.

- - - - -
689b068f by Henry Wilkes at 2024-10-01T16:05:29+01:00
fixup! Add CI for Tor Browser

Bug 43156: Also include strings from the legacy branch.

- - - - -


2 changed files:

- .gitlab/ci/update-translations.yml
- tools/torbrowser/l10n/combine-translation-versions.py


Changes:

=====================================
.gitlab/ci/update-translations.yml
=====================================
@@ -28,6 +28,7 @@ combine-en-US-translations:
       base-browser:base-browser.ftl
       fenix-torbrowserstringsxml:torbrowser_strings.xml
     '
+    TRANSLATION_INCLUDE_LEGACY: "true"
   cache:
     paths:
       - .cache/pip


=====================================
tools/torbrowser/l10n/combine-translation-versions.py
=====================================
@@ -67,136 +67,227 @@ def git_lines(git_args: list[str]) -> list[str]:
     return [line for line in git_text(git_args).split("\n") if line]
 
 
-def git_file_paths(git_ref: str) -> list[str]:
-    """Get the full list of file paths found under the given tree.
-
-    :param git_ref: The git reference for the tree to search.
-    :returns: The found file paths.
-    """
-    return git_lines(["ls-tree", "-r", "--format=%(path)", git_ref])
-
-
-def matching_path(search_paths: list[str], filename: str) -> str | None:
-    """Get the matching file path with the given filename, if it exists.
-
-    :param search_paths: The file paths to search through.
-    :param filename: The file name to match.
-    :returns: The unique file path with the matching name, or None if no such
-      match was found.
-    :throws Exception: If multiple paths shared the same file name.
-    """
-    matching = [path for path in search_paths if os.path.basename(path) == filename]
-    if not matching:
-        return None
-    if len(matching) > 1:
-        raise Exception("Multiple occurrences of {filename}")
-    return matching[0]
-
-
-def git_file_content(git_ref: str, path: str | None) -> str | None:
-    """Get the file content of the specified git blob object.
-
-    :param git_ref: The reference for the tree to find the file under.
-    :param path: The file path for the object, or None if there is no path.
-    :returns: The file content, or None if no path was given.
-    """
-    if path is None:
-        return None
-    return git_text(["cat-file", "blob", f"{git_ref}:{path}"])
-
-
-def get_stable_branch(branch_prefix: str) -> str:
+class BrowserBranch:
+    """Represents a browser git branch."""
+
+    def __init__(self, branch_name: str, is_head: bool = False) -> None:
+        """Create a new instance.
+
+        :param branch_name: The branch's git name.
+        :param is_head: Whether the branch matches "HEAD".
+        """
+        version_match = re.match(
+            r"(?P<prefix>[a-z]+\-browser)\-"
+            r"(?P<firefox>[0-9]+(?:\.[0-9]+){1,2})esr\-"
+            r"(?P<browser>[0-9]+\.[05])\-"
+            r"(?P<number>[0-9]+)$",
+            branch_name,
+        )
+
+        if not version_match:
+            raise ValueError(f"Unable to parse the version from the ref {branch_name}")
+
+        self.name = branch_name
+        self.prefix = version_match.group("prefix")
+        self.browser_version = version_match.group("browser")
+        self._is_head = is_head
+        self._ref = "HEAD" if is_head else f"origin/{branch_name}"
+
+        firefox_nums = [int(n) for n in version_match.group("firefox").split(".")]
+        if len(firefox_nums) == 2:
+            firefox_nums.append(0)
+        browser_nums = [int(n) for n in self.browser_version.split(".")]
+        branch_number = int(version_match.group("number"))
+        # Prioritise the firefox ESR version, then the browser version then the
+        # branch number.
+        self._ordered = (
+            firefox_nums[0],
+            firefox_nums[1],
+            firefox_nums[2],
+            browser_nums[0],
+            browser_nums[1],
+            branch_number,
+        )
+
+        # Minor version for browser is only ever "0" or "5", so we can convert
+        # the version to an integer.
+        self._browser_int_version = int(2 * float(self.browser_version))
+
+        self._file_paths: list[str] | None = None
+
+    def release_below(self, other: "BrowserBranch", num: int) -> bool:
+        """Determine whether another branch is within range of a previous
+        browser release.
+
+        The browser versions are expected to increment by "0.5", and a previous
+        release branch's version is expected to be `num * 0.5` behind the
+        current one.
+
+        :param other: The branch to compare.
+        :param num: The number of "0.5" releases behind to test with.
+        """
+        return other._browser_int_version == self._browser_int_version - num
+
+    def __lt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered < other._ordered
+
+    def __gt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered > other._ordered
+
+    def get_file_content(self, filename: str) -> str | None:
+        """Fetch the file content for the named file in this branch.
+
+        :param filename: The name of the file to fetch the content for.
+        :returns: The file content, or `None` if no file could be found.
+        """
+        if self._file_paths is None:
+            if not self._is_head:
+                # Minimal fetch of non-HEAD branch to get the file paths.
+                # Individual file blobs will be downloaded as needed.
+                git_run(
+                    ["fetch", "--depth=1", "--filter=blob:none", "origin", self._ref]
+                )
+            self._file_paths = git_lines(
+                ["ls-tree", "-r", "--format=%(path)", self._ref]
+            )
+
+        matching = [
+            path for path in self._file_paths if os.path.basename(path) == filename
+        ]
+        if not matching:
+            return None
+        if len(matching) > 1:
+            raise Exception(f"Multiple occurrences of {filename}")
+
+        path = matching[0]
+
+        return git_text(["cat-file", "blob", f"{self._ref}:{path}"])
+
+
+def get_stable_branch(
+    compare_version: BrowserBranch,
+) -> tuple[BrowserBranch, BrowserBranch | None]:
     """Find the most recent stable branch in the origin repository.
 
-    :param branch_prefix: The prefix that the stable branch should have.
-    :returns: The branch name.
+    :param compare_version: The development branch to compare against.
+    :returns: The stable and legacy branches. If no legacy branch is found,
+      `None` will be returned instead.
     """
-    tag_glob = f"{branch_prefix}-*-build1"
+    # We search for build1 tags. These are added *after* the rebase of browser
+    # commits, so the corresponding branch should contain our strings.
+    # Moreover, we *assume* that the branch with the most recent ESR version
+    # with such a tag will be used in the *next* stable build in
+    # tor-browser-build.
+    tag_glob = f"{compare_version.prefix}-*esr-*-*-build1"
+
     # To speed up, only fetch the tags without blobs.
     git_run(
         ["fetch", "--depth=1", "--filter=object:type=tag", "origin", "tag", tag_glob]
     )
-    # Get most recent stable tag.
+    stable_branches = []
+    legacy_branches = []
+    stable_annotation_regex = re.compile(r"\bstable\b")
+    legacy_annotation_regex = re.compile(r"\blegacy\b")
+
     for build_tag, annotation in (
-        line.split(" ", 1)
-        for line in git_lines(["tag", "-n1", "--list", tag_glob, "--sort=-taggerdate"])
+        line.split(" ", 1) for line in git_lines(["tag", "-n1", "--list", tag_glob])
     ):
-        if "stable" in annotation:
+        is_stable = bool(stable_annotation_regex.search(annotation))
+        is_legacy = bool(legacy_annotation_regex.search(annotation))
+        if not is_stable and not is_legacy:
+            continue
+        try:
             # Branch name is the same as the tag, minus "-build1".
-            return re.sub(r"-build1$", "", build_tag)
-    raise Exception("No stable build1 tag found")
-
-
-def get_version_from_branch_name(branch_name: str) -> tuple[str, float]:
-    """Get the branch prefix and version from its name.
-
-    :param branch_name: The branch to extract from.
-    :returns: The branch prefix and its version number.
-    """
-    version_match = re.match(
-        r"([a-z-]+)-[^-]*-([0-9]+\.[05])-",
-        branch_name,
+            branch = BrowserBranch(re.sub(r"-build1$", "", build_tag))
+        except ValueError:
+            logger.warning(f"Could not read the version for {build_tag}")
+            continue
+        if branch.prefix != compare_version.prefix:
+            continue
+        if is_stable:
+            # Stable can be one release version behind.
+            # NOTE: In principle, when switching between versions there may be a
+            # window of time where the development branch has not yet progressed
+            # to the next "0.5" release, so has the same browser version as the
+            # stable branch. So we also allow for matching browser versions.
+            # NOTE:
+            # 1. The "Will be unused in" message will not make sense, but we do
+            #    not expect string differences in this scenario.
+            # 2. We do not expect this scenario to last for long.
+            if not (
+                compare_version.release_below(branch, 1)
+                or compare_version.release_below(branch, 0)
+            ):
+                continue
+            stable_branches.append(branch)
+        elif is_legacy:
+            # Legacy can be two release versions behind.
+            # We also allow for being just one version behind.
+            if not (
+                compare_version.release_below(branch, 2)
+                or compare_version.release_below(branch, 1)
+            ):
+                continue
+            legacy_branches.append(branch)
+
+    if not stable_branches:
+        raise Exception("No stable build1 branch found")
+
+    return (
+        # Return the stable branch with the highest version.
+        max(stable_branches),
+        max(legacy_branches) if legacy_branches else None,
     )
 
-    if not version_match:
-        raise ValueError(f"Unable to parse the version from the branch {branch_name}")
 
-    return (version_match.group(1), float(version_match.group(2)))
+current_branch = BrowserBranch(args.current_branch, is_head=True)
 
+stable_branch, legacy_branch = get_stable_branch(current_branch)
 
-branch_prefix, current_version = get_version_from_branch_name(args.current_branch)
+if os.environ.get("TRANSLATION_INCLUDE_LEGACY", "") != "true":
+    legacy_branch = None
 
-stable_branch = get_stable_branch(branch_prefix)
-_, stable_version = get_version_from_branch_name(stable_branch)
-
-if stable_version > current_version or stable_version < current_version - 0.5:
-    raise Exception(
-        f"Version of stable branch {stable_branch} is not within 0.5 of the "
-        f"current branch {args.current_branch}"
-    )
-
-# Minimal fetch of stable_branch.
-# Individual file blobs will be downloaded as needed.
-git_run(["fetch", "--depth=1", "--filter=blob:none", "origin", stable_branch])
-
-current_file_paths = git_file_paths("HEAD")
-old_file_paths = git_file_paths(f"origin/{stable_branch}")
-
-ci_commit = os.environ.get("CI_COMMIT_SHA", "")
-ci_url_base = os.environ.get("CI_PROJECT_URL", "")
-
-json_data = {
-    "commit": ci_commit,
-    "commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
-    if (ci_commit and ci_url_base)
-    else "",
-    "project-path": os.environ.get("CI_PROJECT_PATH", ""),
-    "current-branch": args.current_branch,
-    "stable-branch": stable_branch,
-    "files": [],
-}
+files_list = []
 
 for translation_branch, name in (
     part.strip().split(":", 1) for part in args.filenames.split(" ") if part.strip()
 ):
-    current_path = matching_path(current_file_paths, name)
-    old_path = matching_path(old_file_paths, name)
+    current_content = current_branch.get_file_content(name)
+    stable_content = stable_branch.get_file_content(name)
 
-    if current_path is None and old_path is None:
+    if current_content is None and stable_content is None:
         # No file in either branch.
         logger.warning(f"{name} does not exist in either the current or stable branch")
-    elif current_path is None:
+    elif current_content is None:
         logger.warning(f"{name} deleted in the current branch")
-    elif old_path is None:
+    elif stable_content is None:
         logger.warning(f"{name} does not exist in the stable branch")
 
     content = combine_files(
         name,
-        git_file_content("HEAD", current_path),
-        git_file_content(f"origin/{stable_branch}", old_path),
-        f"Will be unused in Tor Browser {current_version}!",
+        current_content,
+        stable_content,
+        f"Will be unused in Tor Browser {current_branch.browser_version}!",
     )
-    json_data["files"].append(
+
+    if legacy_branch:
+        legacy_content = legacy_branch.get_file_content(name)
+        if (
+            legacy_content is not None
+            and current_content is None
+            and stable_content is None
+        ):
+            logger.warning(f"{name} still exists in the legacy branch")
+        elif legacy_content is None:
+            logger.warning(f"{name} does not exist in the legacy branch")
+        content = combine_files(
+            name,
+            content,
+            legacy_content,
+            f"Unused in Tor Browser {stable_branch.browser_version}!",
+        )
+
+    files_list.append(
         {
             "name": name,
             "branch": translation_branch,
@@ -204,5 +295,23 @@ for translation_branch, name in (
         }
     )
 
+
+ci_commit = os.environ.get("CI_COMMIT_SHA", "")
+ci_url_base = os.environ.get("CI_PROJECT_URL", "")
+
+json_data = {
+    "commit": ci_commit,
+    "commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
+    if (ci_commit and ci_url_base)
+    else "",
+    "project-path": os.environ.get("CI_PROJECT_PATH", ""),
+    "current-branch": current_branch.name,
+    "stable-branch": stable_branch.name,
+    "files": files_list,
+}
+
+if legacy_branch:
+    json_data["legacy-branch"] = legacy_branch.name
+
 with open(args.outname, "w") as file:
     json.dump(json_data, file)



View it on GitLab: https://gitlab.torproject.org/tpo/applications/tor-browser/-/compare/217346c097a5ace0b3c40b5289067a6ca8db3083...689b068febb15642e667c7f224564533b3156d96

-- 
View it on GitLab: https://gitlab.torproject.org/tpo/applications/tor-browser/-/compare/217346c097a5ace0b3c40b5289067a6ca8db3083...689b068febb15642e667c7f224564533b3156d96
You're receiving this email because of your account on gitlab.torproject.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.torproject.org/pipermail/tor-commits/attachments/20241001/12372e58/attachment-0001.htm>


More information about the tor-commits mailing list