Untitled
unknown
plain_text
a year ago
1.6 kB
6
Indexable
def disparity_filter(table, undirected = True, return_self_loops = True): sys.stderr.write("Calculating DF score...\n") table_sum = table.groupby(table["src"]).sum().reset_index() table_deg = table.groupby(table["src"]).count()["trg"].reset_index() table = table.merge(table_sum, on = "src", how = "left", suffixes = ("", "_sum")) table = table.merge(table_deg, on = "src", how = "left", suffixes = ("", "_count")) table["score"] = 1.0 - ((1.0 - (table["nij"] / table["nij_sum"])) ** (table["trg_count"] - 1)) table["variance"] = (table["trg_count"] ** 2) * (((20 + (4.0 * table["trg_count"])) / ((table["trg_count"] + 1.0) * (table["trg_count"] + 2) * (table["trg_count"] + 3))) - ((4.0) / ((table["trg_count"] + 1.0) ** 2))) if not return_self_loops: table = table[table["src"] != table["trg"]] if undirected: table["edge"] = table.apply(lambda x: "%s-%s" % (min(x["src"], x["trg"]), max(x["src"], x["trg"])), axis = 1) table_maxscore = table.groupby(by = "edge")["score"].max().reset_index() table_minvar = table.groupby(by = "edge")["variance"].min().reset_index() table = table.merge(table_maxscore, on = "edge", suffixes = ("_min", "")) table = table.merge(table_minvar, on = "edge", suffixes = ("_max", "")) table = table.drop_duplicates(subset = ["edge"]) table = table.drop("edge", axis=1) table = table.drop("score_min", axis=1) table = table.drop("variance_max", axis=1) return table[["src", "trg", "nij", "score", "variance"]]
Editor is loading...
Leave a Comment