Untitled

 avatar
unknown
plain_text
a year ago
4.2 kB
3
Indexable
def disparity_filter(table, undirected = True, return_self_loops = True):
    sys.stderr.write("Calculating DF score...\n")
    table_sum = table.groupby(table["src"]).sum().reset_index()
    table_deg = table.groupby(table["src"]).count()["trg"].reset_index()
    table = table.merge(table_sum, on = "src", how = "left", suffixes = ("", "_sum"))
    table = table.merge(table_deg, on = "src", how = "left", suffixes = ("", "_count"))
    table["score"] = 1.0 - ((1.0 - (table["nij"] / table["nij_sum"])) ** (table["trg_count"] - 1))
    table["variance"] = (table["trg_count"] ** 2) * (((20 + (4.0 * table["trg_count"])) / ((table["trg_count"] + 1.0) * (table["trg_count"] + 2) * (table["trg_count"] + 3))) - ((4.0) / ((table["trg_count"] + 1.0) ** 2)))
    if not return_self_loops:
        table = table[table["src"] != table["trg"]]
    if undirected:
        table["edge"] = table.apply(lambda x: "%s-%s" % (min(x["src"], x["trg"]), max(x["src"], x["trg"])), axis = 1)
        table_maxscore = table.groupby(by = "edge")["score"].max().reset_index()
        table_minvar = table.groupby(by = "edge")["variance"].min().reset_index()
        table = table.merge(table_maxscore, on = "edge", suffixes = ("_min", ""))
        table = table.merge(table_minvar, on = "edge", suffixes = ("_max", ""))
        table = table.drop_duplicates(subset = ["edge"])
        table = table.drop("edge", axis=1)
        table = table.drop("score_min", axis=1)
        table = table.drop("variance_max", axis=1)
    return table[["src", "trg", "nij", "score", "variance"]]


def read(filename, column_of_interest, triangular_input = True, consider_self_loops = False, undirected = True, drop_zeroes = False):
    """Reads a field separated input file into the internal backboning format (a Pandas Dataframe).
    The input file should have three or more columns (default separator: tab).
    The input file must have a one line header with the column names.
    There must be two columns called 'src' and 'trg', indicating the origin and destination of the interaction.
    All other columns must contain integer or floats, indicating the edge weight.
    In case of undirected network, the edges have to be present in both directions with the same weights, or set triangular_input to True.

    Args:
    filename (str): The path to the file containing the edges.
    column_of_interest (str): The column name identifying the weight that will be used for the backboning.

    KWArgs:
    triangular_input (bool): Is the network undirected and are the edges present only in one direction? default: False
    consider_self_loops (bool): Do you want to consider self loops when calculating the backbone? default: True
    undirected (bool): Is the network undirected? default: False
    drop_zeroes (bool): Do you want to keep zero weighted connections in the network? Important: it affects methods based on degree, like disparity_filter. default: False
    sep (char): The field separator of the inout file. default: tab

    Returns:
    The parsed network data, the number of nodes in the network and the number of edges.
    """
    table = pd.read_csv(filename, nrows=50000)
    table = table[["src", "trg", column_of_interest]]
    table.rename(columns = {column_of_interest: "nij"}, inplace = True)
    if drop_zeroes:
        table = table[table["nij"] > 0]
    if not consider_self_loops:
        table = table[table["src"] != table["trg"]]
    if triangular_input:
        table2 = table.copy()
        table2["new_src"] = table["trg"]
        table2["new_trg"] = table["src"]
        table2.drop("src", axis=1, inplace=True)
        table2.drop("trg", axis=1, inplace=True)
        table2 = table2.rename(columns = {"new_src": "src", "new_trg": "trg"})
        table = pd.concat([table, table2], axis = 0)
        table = pd.concat([table, table.rename(columns={'src': 'trg', 'trg': 'src'})])
        table = table.drop_duplicates(subset = ["src", "trg"])
    original_nodes = len(set(table["src"]) | set(table["trg"]))
    original_edges = table.shape[0]
    if undirected:
        return table, original_nodes, original_edges / 2
    else:
        return table, original_nodes, original_edges
Editor is loading...
Leave a Comment