#!/usr/bin/env python3

import getopt
import sys

import numpy as np
import pandas as pd

if __name__ == "__main__":
    _, (relay_uptime_csv_filename, relay_dir_csv_filename) = getopt.gnu_getopt(sys.argv[1:], "")
    relay_uptime = pd.read_csv(relay_uptime_csv_filename)
    relay_dir = pd.read_csv(relay_dir_csv_filename)
    j = (
        pd.merge(relay_uptime, relay_dir, on = "date", how = "inner")
            .rename(columns = {
                "relay_uptime_hours": "n(N)",
                "relay_dir_write_hours": "n(H)",
                "relay_dir_write_bytes": "h(H)",
                "relay_dir_stats_hours": "n(R)",
                "both_hours": "n(R∧H)",
                "both_bytes": "h(R∧H)",
            })
    )
    j["n(R\\H)"] = j["n(R)"] - j["n(R∧H)"]
    # Uncomment these to use the formulas for n(R\H) and h(R∧H) from
    # https://metrics.torproject.org/reproducible-metrics.html#relay-users
    # j["n(R\\H)"] = np.maximum(0, j["n(R)"] - j["n(H)"])
    # j["h(R∧H)"] = np.minimum(j["n(R)"], j["n(H)"]) / np.maximum(j["n(R)"], j["n(H)"]) * j["h(H)"]
    j["frac"] = (j["h(R∧H)"] * j["n(H)"] + j["h(H)"] * j["n(R\\H)"]) / (j["h(H)"] * j["n(N)"])
    print(j[[
        "date",
        "n(N)",
        "n(H)",
        "h(H)",
        "n(R)",
        "n(R∧H)",
        "h(R∧H)",
        "n(R\H)",
        "frac",
    ]])
