Untitled
unknown
python
10 months ago
7.4 kB
14
Indexable
"""
Assignment 1 Part B for COMS 4223 Neworks, Crowds, and the Web
"""
import networkx as nx
from typing import List
import numpy as np
### Exercise 2
def exercise2_1() -> int:
"""
return 42
"""
return 42
# raise ValueError("Not implemented!") # remove this
def exercise2_2() -> List[int]:
"""
Return a list of all odd numbers between 50 and 150.
"""
result = []
for i in range(51, 150, 2):
result.append(i)
return result
# raise ValueError("Not implemented!") # remove this
### Exercise 3
def exercise3_1(p: str) -> nx.Graph:
"""
construct graph from file
load p, the path to the txt file (for this assignment, "ca-GrQc.txt")
as an undirected networkx graph.
return the graph
"""
# init a graph
G = nx.Graph()
all_nodes = set()
# process the file
with open(p, 'r') as file:
for line in file:
if line.startswith('#'):
continue
# source, sink
nodes = line.strip().split()
if len(nodes) >= 2:
all_nodes.add(nodes[0])
all_nodes.add(nodes[1])
G.add_edge(nodes[0], nodes[1])
else:
G.add_node(nodes[0])
print("single node")
print("number of edges: ", G.number_of_edges())
print("number of nodes: ", G.number_of_nodes())
print("number of nodes by set: ", len(all_nodes))
return G
# raise ValueError("Not implemented!") # remove this
def exercise3_2(g: nx.Graph) -> tuple[int, int]:
"""
given a graph g, return a tuple of integers in the following order:
1. the maximum degree in the network
2. the minimum degree in the network
networkx has G.degree() to get degrees for each node. (node_id, degree)
"""
degrees = dict(g.degree())
maxd, mind = max(degrees.values()), min(degrees.values())
return (maxd, mind)
# raise ValueError("Not implemented!") # remove this
def exercise3_3(g: nx.Graph) -> tuple[
float,
float,
float,
float,
float,
]:
"""
given a graph g, return a tuple of the following in order:
- Percentage of authors with only 1 coauthor. (degree == 1)
- Percentage of authors with 10 or fewer coauthors.
- Percentage of authors with 20 or fewer coauthors.
- Percentage of authors with 40 or fewer coauthors.
- Percentage of authors with 80 or fewer coauthors.
represent each percentage as a decimal
"""
# ans 1: without np
# bounds = [10, 20, 40, 80]
# degrees = dict(g.degree())
# total = len(degrees) # total amount of ppl
# result = []
# # calculate each group
# # group 1
# count = sum(1 for d in degrees.values() if d == 1)
# result.append(count / total)
# # remaining groups
# for b in bounds:
# count = sum(1 for d in degrees.values() if d <= b)
# result.append(count / total)
# return tuple(result)
# sol 2: using np
bounds = [10, 20, 40, 80]
degrees = np.array(list(dict(g.degree()).values()))
total = len(degrees)
result = []
result.append( float(np.sum(degrees == 1) / total) )
for b in bounds:
result.append( float(np.sum(degrees <= b) / total ) )
# result is ready
return tuple(result)
# raise ValueError("Not implemented!") # remove this
### Exercise 4
def exercise4_1(g: nx.Graph, n1: int, n2: int) -> bool:
"""
given a graph g and two nodes n1 and n2 in g,
return True if n1 and n2 have a strong tie else False
strong tie definition: edges (n1, n2), (n1, n3), (n2, n3) all exist in G. otherwise its a weak tie.
in graph:
if n1_neighbor intersects with n2_neighbor
"""
n1, n2 = str(n1), str(n2)
if not g.has_edge(n1, n2):
return False
n1_neighbor = set(g.neighbors(n1))
n2_neighbor = set(g.neighbors(n2))
comm = n1_neighbor.intersection(n2_neighbor)
return len(comm) > 0
# if not g.has_edge(n1, n2):
# return False
# return next(nx.common_neighbors(g, n1, n2), None) is not None
# raise ValueError("Not implemented!") # remove this
def exercise4_2(g: nx.Graph) -> tuple[int, int, int]:
"""
Given a graph g, return a tuple of the following 3 values:
- Number of edges
- Number of strong ties: if 2 nodes have a common neighbor
- Number of weak ties: total edge - num(strong tie)
"""
total = g.number_of_edges()
strong_ties = 0
weak_ties = 0
# check each pair of nodes
for e in g.edges():
n1, n2 = e
n1_neighbor = set(g.neighbors(n1))
n2_neighbor = set(g.neighbors(n2))
comm = n1_neighbor.intersection(n2_neighbor)
if len(comm) > 0:
strong_ties += 1
weak_ties = total - strong_ties
return (total, strong_ties, weak_ties)
# --
# raise ValueError("Not implemented!") # remove this
def exercise4_3(g: nx.Graph) -> tuple[int, int, int, int]:
"""
Takes the graph g as an input and returns a tuple of the following 4 values:
[work on original graph]
- The number of connected components in the graph. use nx.connected_component(g)
- The number of nodes in the largest connected component in the graph
[work on modified graph]
- The number of connected components in the graph, if you removed all weak ties.
- The number of nodes in the largest connected component in the graph, if you removed all weak ties.
"""
# work on original graph
components = list(nx.connected_components(g))
num_cc = len(components)
max_cc = len(max(components, key=len))
# work on new graph
g_strong = nx.Graph()
# add only strong tie to new graph
for e in g.edges():
n1, n2 = e
n1_neighbor = set(g.neighbors(n1))
n2_neighbor = set(g.neighbors(n2))
comm = n1_neighbor.intersection(n2_neighbor)
if len(comm) > 0:
# is a strong tie
g_strong.add_edge(n1, n2)
g_strong.add_nodes_from(g.nodes()) # add all nodes
# count number for new graph
components_strong = list(nx.connected_components(g_strong))
num_cc_strong = len(components_strong)
max_cc_strong = len( max(components_strong, key=len) ) if g_strong else 0
return (num_cc, max_cc, num_cc_strong, max_cc_strong)
# raise ValueError("Not implemented!") # remove this
if __name__ == "__main__":
print(exercise2_1()) # expected output: 42
print(exercise2_2()) # expected output: 51 53 55 ... 149
g = exercise3_1('ca-GrQc.txt')
output_3_2 = exercise3_2(g)
print(output_3_2)
if len(output_3_2) != 2:
raise ValueError("Output of exercise3_2 should be a tuple of 2 integers")
output_3_3 = exercise3_3(g)
print(output_3_3)
if len(output_3_3) != 5:
raise ValueError("Output of exercise3_3 should be a list of 5 floats")
print(exercise4_1(g, 3466, 937)) # expected output: True
print(exercise4_1(g, 3466, 14924)) # expected output: False
output_4_2 = exercise4_2(g)
print("output_4_2 = ", output_4_2)
if len(output_4_2) != 3:
raise ValueError("Output of exercise4_2 should be a tuple of 3 integers")
output_4_3 = exercise4_3(g)
print("output_4_3 = ", output_4_3)
if len(output_4_3) != 4:
raise ValueError("Output of exercise4_3 should be a tuple of 4 integers")
Editor is loading...
Leave a Comment