Untitled
unknown
python
18 days ago
7.4 kB
10
Indexable
""" Assignment 1 Part B for COMS 4223 Neworks, Crowds, and the Web """ import networkx as nx from typing import List import numpy as np ### Exercise 2 def exercise2_1() -> int: """ return 42 """ return 42 # raise ValueError("Not implemented!") # remove this def exercise2_2() -> List[int]: """ Return a list of all odd numbers between 50 and 150. """ result = [] for i in range(51, 150, 2): result.append(i) return result # raise ValueError("Not implemented!") # remove this ### Exercise 3 def exercise3_1(p: str) -> nx.Graph: """ construct graph from file load p, the path to the txt file (for this assignment, "ca-GrQc.txt") as an undirected networkx graph. return the graph """ # init a graph G = nx.Graph() all_nodes = set() # process the file with open(p, 'r') as file: for line in file: if line.startswith('#'): continue # source, sink nodes = line.strip().split() if len(nodes) >= 2: all_nodes.add(nodes[0]) all_nodes.add(nodes[1]) G.add_edge(nodes[0], nodes[1]) else: G.add_node(nodes[0]) print("single node") print("number of edges: ", G.number_of_edges()) print("number of nodes: ", G.number_of_nodes()) print("number of nodes by set: ", len(all_nodes)) return G # raise ValueError("Not implemented!") # remove this def exercise3_2(g: nx.Graph) -> tuple[int, int]: """ given a graph g, return a tuple of integers in the following order: 1. the maximum degree in the network 2. the minimum degree in the network networkx has G.degree() to get degrees for each node. (node_id, degree) """ degrees = dict(g.degree()) maxd, mind = max(degrees.values()), min(degrees.values()) return (maxd, mind) # raise ValueError("Not implemented!") # remove this def exercise3_3(g: nx.Graph) -> tuple[ float, float, float, float, float, ]: """ given a graph g, return a tuple of the following in order: - Percentage of authors with only 1 coauthor. (degree == 1) - Percentage of authors with 10 or fewer coauthors. - Percentage of authors with 20 or fewer coauthors. - Percentage of authors with 40 or fewer coauthors. - Percentage of authors with 80 or fewer coauthors. represent each percentage as a decimal """ # ans 1: without np # bounds = [10, 20, 40, 80] # degrees = dict(g.degree()) # total = len(degrees) # total amount of ppl # result = [] # # calculate each group # # group 1 # count = sum(1 for d in degrees.values() if d == 1) # result.append(count / total) # # remaining groups # for b in bounds: # count = sum(1 for d in degrees.values() if d <= b) # result.append(count / total) # return tuple(result) # sol 2: using np bounds = [10, 20, 40, 80] degrees = np.array(list(dict(g.degree()).values())) total = len(degrees) result = [] result.append( float(np.sum(degrees == 1) / total) ) for b in bounds: result.append( float(np.sum(degrees <= b) / total ) ) # result is ready return tuple(result) # raise ValueError("Not implemented!") # remove this ### Exercise 4 def exercise4_1(g: nx.Graph, n1: int, n2: int) -> bool: """ given a graph g and two nodes n1 and n2 in g, return True if n1 and n2 have a strong tie else False strong tie definition: edges (n1, n2), (n1, n3), (n2, n3) all exist in G. otherwise its a weak tie. in graph: if n1_neighbor intersects with n2_neighbor """ n1, n2 = str(n1), str(n2) if not g.has_edge(n1, n2): return False n1_neighbor = set(g.neighbors(n1)) n2_neighbor = set(g.neighbors(n2)) comm = n1_neighbor.intersection(n2_neighbor) return len(comm) > 0 # if not g.has_edge(n1, n2): # return False # return next(nx.common_neighbors(g, n1, n2), None) is not None # raise ValueError("Not implemented!") # remove this def exercise4_2(g: nx.Graph) -> tuple[int, int, int]: """ Given a graph g, return a tuple of the following 3 values: - Number of edges - Number of strong ties: if 2 nodes have a common neighbor - Number of weak ties: total edge - num(strong tie) """ total = g.number_of_edges() strong_ties = 0 weak_ties = 0 # check each pair of nodes for e in g.edges(): n1, n2 = e n1_neighbor = set(g.neighbors(n1)) n2_neighbor = set(g.neighbors(n2)) comm = n1_neighbor.intersection(n2_neighbor) if len(comm) > 0: strong_ties += 1 weak_ties = total - strong_ties return (total, strong_ties, weak_ties) # -- # raise ValueError("Not implemented!") # remove this def exercise4_3(g: nx.Graph) -> tuple[int, int, int, int]: """ Takes the graph g as an input and returns a tuple of the following 4 values: [work on original graph] - The number of connected components in the graph. use nx.connected_component(g) - The number of nodes in the largest connected component in the graph [work on modified graph] - The number of connected components in the graph, if you removed all weak ties. - The number of nodes in the largest connected component in the graph, if you removed all weak ties. """ # work on original graph components = list(nx.connected_components(g)) num_cc = len(components) max_cc = len(max(components, key=len)) # work on new graph g_strong = nx.Graph() # add only strong tie to new graph for e in g.edges(): n1, n2 = e n1_neighbor = set(g.neighbors(n1)) n2_neighbor = set(g.neighbors(n2)) comm = n1_neighbor.intersection(n2_neighbor) if len(comm) > 0: # is a strong tie g_strong.add_edge(n1, n2) g_strong.add_nodes_from(g.nodes()) # add all nodes # count number for new graph components_strong = list(nx.connected_components(g_strong)) num_cc_strong = len(components_strong) max_cc_strong = len( max(components_strong, key=len) ) if g_strong else 0 return (num_cc, max_cc, num_cc_strong, max_cc_strong) # raise ValueError("Not implemented!") # remove this if __name__ == "__main__": print(exercise2_1()) # expected output: 42 print(exercise2_2()) # expected output: 51 53 55 ... 149 g = exercise3_1('ca-GrQc.txt') output_3_2 = exercise3_2(g) print(output_3_2) if len(output_3_2) != 2: raise ValueError("Output of exercise3_2 should be a tuple of 2 integers") output_3_3 = exercise3_3(g) print(output_3_3) if len(output_3_3) != 5: raise ValueError("Output of exercise3_3 should be a list of 5 floats") print(exercise4_1(g, 3466, 937)) # expected output: True print(exercise4_1(g, 3466, 14924)) # expected output: False output_4_2 = exercise4_2(g) print("output_4_2 = ", output_4_2) if len(output_4_2) != 3: raise ValueError("Output of exercise4_2 should be a tuple of 3 integers") output_4_3 = exercise4_3(g) print("output_4_3 = ", output_4_3) if len(output_4_3) != 4: raise ValueError("Output of exercise4_3 should be a tuple of 4 integers")
Editor is loading...
Leave a Comment