Untitled
unknown
plain_text
2 years ago
1.6 kB
9
Indexable
def disparity_filter(table, undirected = True, return_self_loops = True):
sys.stderr.write("Calculating DF score...\n")
table_sum = table.groupby(table["src"]).sum().reset_index()
table_deg = table.groupby(table["src"]).count()["trg"].reset_index()
table = table.merge(table_sum, on = "src", how = "left", suffixes = ("", "_sum"))
table = table.merge(table_deg, on = "src", how = "left", suffixes = ("", "_count"))
table["score"] = 1.0 - ((1.0 - (table["nij"] / table["nij_sum"])) ** (table["trg_count"] - 1))
table["variance"] = (table["trg_count"] ** 2) * (((20 + (4.0 * table["trg_count"])) / ((table["trg_count"] + 1.0) * (table["trg_count"] + 2) * (table["trg_count"] + 3))) - ((4.0) / ((table["trg_count"] + 1.0) ** 2)))
if not return_self_loops:
table = table[table["src"] != table["trg"]]
if undirected:
table["edge"] = table.apply(lambda x: "%s-%s" % (min(x["src"], x["trg"]), max(x["src"], x["trg"])), axis = 1)
table_maxscore = table.groupby(by = "edge")["score"].max().reset_index()
table_minvar = table.groupby(by = "edge")["variance"].min().reset_index()
table = table.merge(table_maxscore, on = "edge", suffixes = ("_min", ""))
table = table.merge(table_minvar, on = "edge", suffixes = ("_max", ""))
table = table.drop_duplicates(subset = ["edge"])
table = table.drop("edge", axis=1)
table = table.drop("score_min", axis=1)
table = table.drop("variance_max", axis=1)
return table[["src", "trg", "nij", "score", "variance"]]Editor is loading...
Leave a Comment