\n", + " | num_nodes | \n", + "num_edges | \n", + "avg_degree | \n", + "directed | \n", + "name | \n", + "
---|---|---|---|---|---|
0 | \n", + "23133 | \n", + "93439 | \n", + "8.078416 | \n", + "False | \n", + "Collaboration | \n", + "
1 | \n", + "192244 | \n", + "609066 | \n", + "6.336385 | \n", + "False | \n", + "Internet | \n", + "
2 | \n", + "4941 | \n", + "6594 | \n", + "2.669095 | \n", + "False | \n", + "PowerGrid | \n", + "
3 | \n", + "2018 | \n", + "2930 | \n", + "2.903865 | \n", + "False | \n", + "Protein | \n", + "
4 | \n", + "36595 | \n", + "91826 | \n", + "5.018500 | \n", + "True | \n", + "PhoneCalls | \n", + "
5 | \n", + "449673 | \n", + "4689479 | \n", + "20.857285 | \n", + "True | \n", + "Citation | \n", + "
6 | \n", + "1039 | \n", + "5802 | \n", + "11.168431 | \n", + "True | \n", + "Metabolic | \n", + "
7 | \n", + "57194 | \n", + "103731 | \n", + "3.627339 | \n", + "True | \n", + "|
8 | \n", + "325729 | \n", + "1497134 | \n", + "9.192513 | \n", + "True | \n", + "WWW | \n", + "
+import itertools
+import numpy as np
+import networkx as nx
+
+
+
+[docs]
+def find_sap(G, start, target, path=None):
+
+ """
+ Finds all self-avoiding paths (SAPs) in a given graph from a start node to a target node.
+ A self-avoiding path is a path that does not revisit any node.
+
+ Parameters
+ ----------
+ graph : NetworkX graph
+ The input graph where SAPs will be found.
+ start : str or int
+ The node where the search for SAPs starts.
+ target : str or int
+ The node where the search for SAPs ends.
+ path : list, optional
+ Internal parameter used to keep track of the current path during the search.
+
+ Yields
+ ------
+ list
+ A self-avoiding path from the start node to the target node.
+
+ Examples
+ --------
+ >>> import networkx as nx
+ >>> G = nx.Graph()
+ >>> edges = [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E'), ('C', 'F'), ('E', 'F')]
+ >>> G.add_edges_from(edges)
+ >>> start_node = 'A'
+ >>> target_node = 'F'
+ >>> all_saps = list(find_sap(G, start_node, target_node))
+ >>> for path in all_saps:
+ >>> print("->".join(path))
+ """
+
+ if path is None:
+ path = []
+
+ if len(G.nodes()) == 0:
+ return []
+
+ path.append(start)
+
+ if start == target:
+ yield path[:]
+ else:
+ for neighbor in G.neighbors(start):
+ if neighbor not in path:
+ yield from find_sap(G, neighbor, target, path)
+
+ path.pop()
+
+
+
+
+[docs]
+def is_hamiltonian_path(G, path):
+ '''
+ Check if a given path is a Hamiltonian path in a graph.
+
+ Parameters:
+ -----------
+ G : networkx.Graph, networkx.DiGraph)
+ The input graph.
+ path : list of str or int
+ list of nodes in the path.
+
+ Returns:
+ -----------
+ bool :
+ True if the path is a Hamiltonian path, otherwise False
+
+ '''
+ return all(G.has_edge(path[i], path[i + 1]) for i in range(len(path) - 1))
+
+
+
+
+[docs]
+def find_hamiltonian_path(G):
+ '''
+ find the Hamiltonian path in given graph.
+
+ Parameters
+ -----------
+ G: nx.Graph or nx.DiGraph
+ input graph.
+
+ Returns
+ value : list of nodes in Hamiltonian path if exists, otherwise None.
+
+ '''
+ nodes = list(G.nodes())
+ for perm in itertools.permutations(nodes):
+ if is_hamiltonian_path(G, perm):
+ return perm
+ return None
+
+
+
+
+[docs]
+def check_connectivity(G):
+ '''
+ Check if the graph is connected.
+
+ Parameters
+ --------------
+ G : networkx.Graph, networkx.DiGraph
+ The input graph.
+
+ Returns
+ ------------
+
+ connectivity: (str)
+ for directed graphs, it returns
+ - "weakly connected"
+ - "strongly connected"
+ - "disconnected".
+ for undirected graphs,
+ - "connected"
+ - "disconnected".
+ '''
+
+ is_directed = isinstance(G, nx.DiGraph)
+
+ if is_directed:
+ if nx.is_weakly_connected(G):
+ return "weakly connected"
+ elif nx.is_strongly_connected(G):
+ return "strongly connected"
+ else:
+ return "disconnected"
+ else:
+ if nx.is_connected(G):
+ return "connected"
+ else:
+ return "disconnected"
+
+
+
+[docs]
+def graph_info(G, quick=True):
+ """
+ Generate various graph information.
+
+ Parameters
+ -------------
+ G : (networkx.Graph, networkx.DiGraph)
+ The input graph for which the information is to be generated.
+
+
+ """
+ is_directed = isinstance(G, nx.DiGraph)
+
+ # number_of_triangles = #TODO
+
+ connectivity = check_connectivity(G)
+
+ if not quick:
+ if connectivity == "strongly connected" or connectivity == "connected":
+ diameter = nx.diameter(G)
+ else:
+ diameter = -1
+
+ print("Graph information")
+ print(f"{'Directed':40s}: {str(is_directed):>20s}")
+ print(f"{'Number of nodes':40s}: {len(G.nodes()):20d}")
+ print(f"{'Number of edges':40s}: {len(G.edges()):20d}")
+ print(f"{'Average degree':40s}: {sum(dict(G.degree).values()) / len(G.nodes):20.4f}")
+ print(f"{'Connectivity':40s}: {connectivity:>20s}")
+ if not quick:
+ print(f"{'Diameter':40s}: {diameter:20d}")
+ print(f"{'Average clustering coefficient':40s}: {nx.average_clustering(G):20.6f}")
+
+
+ # return {
+ # "Directed": is_directed,
+ # "Number of nodes": len(G.nodes()),
+ # "Number of edges": len(G.edges()),
+ # "average_degree": sum(dict(G.degree).values()) / len(G.nodes),
+ # "diameter": diameter,
+ # "average clustering coefficient": nx.average_clustering(G),
+
+ # }
+
+
+[docs]
+def longest_shortest_path(G):
+ """
+ Calculate the longest shortest path (diameter) in a given graph.
+
+ Parameters
+ -------------
+ G (networkx.Graph or networkx.DiGraph):
+ The input graph, which can be directed or undirected.
+ The graph should be connected, otherwise the diameter will not be defined.
+
+ Returns
+ ---------
+ value : int, float
+ The longest shortest path (diameter) in the graph.
+ If the graph is empty, returns 0.
+ If the graph is not connected, returns float('inf').
+ """
+ path_lengths = dict(nx.all_pairs_shortest_path_length(G))
+ diameter = max(max(lengths.values()) for lengths in path_lengths.values())
+
+ return diameter
+
+
+
+
+[docs]
+def average_degree(G):
+ """
+ Calculate the average degree of a graph.
+
+ Parameters
+ -------------
+ G (networkx.Graph or networkx.DiGraph):
+ The input graph, which can be directed or undirected.
+
+ Returns
+ -----------
+ vlaue: float
+ The average degree of the graph.
+ """
+
+ degrees = [d for n, d in G.degree()]
+ average_degree = sum(degrees) / len(degrees)
+ return average_degree
+
+
+import itertools
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+
+
+
+
+[docs]
+def plot_graph(G, **kwargs):
+ """
+ Plots a NetworkX graph with customizable options.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+ A NetworkX graph object (e.g., nx.Graph, nx.DiGraph).
+ **kwargs : keyword arguments
+ Additional keyword arguments to customize the plot. These can include:
+
+ node_color : str or list, optional
+ Color of the nodes (can be a single color or a list of colors).
+ node_size : int or list, optional
+ Size of the nodes (single value or list of sizes).
+ edge_color : str or list, optional
+ Color of the edges (can be a single color or a list of colors).
+ width : float, optional
+ Width of the edges.
+ with_labels : bool, optional
+ Whether to draw node labels or not.
+ font_size : int, optional
+ Size of the font for node labels.
+ font_color : str, optional
+ Color of the font for node labels.
+ title : str, optional
+ Title of the plot.
+ seed : int, optional
+ Seed for the random layout algorithm.
+ figsize : tuple, optional
+ Size of the figure.
+ ax: axes object
+ Axes object to draw the plot on. Defaults to None, which will create a new figure.
+ pos: object, optional
+ Graph layout (e.g., nx.spring_layout, nx.circular_layout), nx.kamada_kaway_layout(G).
+ Defaults to nx.spring_layout(G).
+
+ """
+
+ # Extracting optional arguments
+ node_color = kwargs.get("node_color", "lightblue")
+ node_size = kwargs.get("node_size", 300)
+ edge_color = kwargs.get("edge_color", "black")
+ width = kwargs.get("width", 1.0)
+ with_labels = kwargs.get("with_labels", True)
+ font_size = kwargs.get("font_size", 12)
+ font_color = kwargs.get("font_color", "black")
+ title = kwargs.get("title", None)
+ seed = kwargs.get("seed", None)
+ edge_labels = kwargs.get("edge_labels", None)
+ figsize = kwargs.get("figsize", (4, 4))
+ ax = kwargs.get("ax", None)
+ pos = kwargs.get("pos", None)
+
+ if ax is None:
+ fig, ax = plt.subplots(1, figsize=figsize)
+ ax.axis("off")
+
+ if seed is not None:
+ np.random.seed(seed)
+
+ if pos is None:
+ pos = nx.spring_layout(
+ G, seed=seed
+ )
+
+ # Draw the network
+ nx.draw(
+ G,
+ pos,
+ node_color=node_color,
+ node_size=node_size,
+ edge_color=edge_color,
+ width=width,
+ with_labels=with_labels,
+ font_size=font_size,
+ font_color=font_color,
+ ax=ax
+ )
+
+ if edge_labels is not None:
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
+
+ # Set the plot title
+ if title is not None:
+ plt.title(title)
+
+ return ax
+
+
+ # Show the plot
+ # plt.show()
+
+import os
+import gzip
+import json
+import numpy as np
+import networkx as nx
+from numpy import power
+from os.path import join
+from cycler import cycler
+from scipy.special import zeta
+from scipy.optimize import bisect
+
+
+try:
+ import powerlaw
+except:
+ pass
+
+
+
+[docs]
+def get_adjacency_list(G):
+ """
+ Generate an adjacency list representation of a given graph.
+
+ Parameters
+ -------------
+ G (networkx.Graph, networkx.DiGraph):
+ The input graph for which the adjacency list is to be generated.
+
+ Returns
+ ---------
+ value: dict
+ A dictionary where each key is a node in the graph and the corresponding value is a list of neighboring nodes.
+ """
+ return {n: list(neighbors) for n, neighbors in G.adj.items()}
+
+
+
+# def _load_graph(file_path, kind, url):
+
+# path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+# path = os.path.join(path, 'netsci/datasets')
+
+# if not os.path.isfile(file_path):
+# os.system(f"wget -P {path} {url}")
+
+# if os.path.isfile(file_path):
+# os.system(f"gunzip -k {file_path}")
+
+# with gzip.open(file_path, 'rt') as f:
+# G = nx.read_adjlist(file_path, create_using=kind)
+
+# os.remove(file_path[:-3])
+# return G
+
+
+
+[docs]
+def download_sample_dataset():
+ url = "https://networksciencebook.com/translations/en/resources/networks.zip"
+ path = get_sample_dataset_path()
+ path_zip = join(path, "networks.zip")
+ file_path = join(path, "collaboration.edgelist.txt")
+ if not os.path.isfile(path_zip):
+ os.system(f"wget -P {path} {url}")
+ else:
+ print(f"File {path_zip} already exists.")
+
+ if not os.path.isfile(file_path):
+ if os.path.isfile(path_zip):
+ os.system(f"unzip {path_zip} -d {path}")
+ print(f"Extracted {path_zip} to {path}")
+
+
+
+
+def _load_graph(file_path, url, directed, verbose=False):
+
+ path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ path = join(path, "netsci/datasets")
+ path_zip = join(path, "networks.zip")
+
+ if not os.path.isfile(file_path):
+ if not os.path.isfile(path_zip):
+ os.system(f"wget -P {path} {url}")
+
+ if not os.path.isfile(file_path):
+ if os.path.isfile(path_zip):
+ os.system(f"unzip {path_zip} -d {path}")
+
+ # Step 1: Read the adjacency list from the file
+ edges = []
+ with open(file_path, "r") as file:
+ for line in file:
+ if line.startswith("#"):
+ continue # Skip comments
+ A, B = map(int, line.split())
+ edges.append((A, B))
+
+ # Step 2: Create the graph
+ G = nx.DiGraph()
+ G.add_edges_from(edges)
+
+ # Step 3: Determine if the graph is directed
+ # is_directed = False
+ # for A, B in edges:
+ # if not G.has_edge(B, A):
+ # is_directed = True
+ # break
+
+ if not directed:
+ G = G.to_undirected()
+ return G
+
+
+
+[docs]
+def load_sample_graph(name, verbose=False):
+ """
+ Load a graph and return it as a NetworkX graph.
+
+ Parameters
+ --------------
+ name: str
+ The name of the graph. Get names from `netsci.utils.show_sample_graphs()`.
+ verbose: bool, optional
+ If True, print information about the loaded graph. Default is True.
+
+ Returns
+ -----------
+ value: networkx.Graph
+ Loaded graph.
+ """
+
+ path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ path = os.path.join(path, "netsci/datasets/")
+
+ with open(os.path.join(path, "sample_graphs.json"), "r") as f:
+ data = json.load(f)
+ if name in list(data.keys()):
+ filename = data[name]["filename"]
+ file_path = os.path.join(path, f"{filename}")
+ directed = data[name]["directed"]
+ G = _load_graph(
+ file_path, url=data[name]["url"], directed=directed, verbose=verbose
+ )
+ if verbose:
+ print(f"Successfully loaded {name}")
+ print("================================")
+ print(data[name]["description"])
+ return G
+
+
+
+
+[docs]
+def list_sample_graphs():
+ """
+ make a list of available real world graphs on datasets
+ """
+
+ path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ path = os.path.join(path, "netsci/datasets")
+ # names = [f[:-7] for f in os.listdir(path) if f.endswith('.txt.gz')]
+ # read json file sample_graphs.json
+
+ with open(os.path.join(path, "sample_graphs.json"), "r") as f:
+ data = json.load(f)
+
+ return data
+
+
+
+
+[docs]
+def generate_power_law_dist(N: int, a: float, xmin: float):
+ """
+ generate power law random numbers p(k) ~ x^(-a) for a>1
+
+ Parameters
+ -----------
+ N:
+ is the number of random numbers
+ a:
+ is the exponent
+ xmin:
+ is the minimum value of distribution
+
+ Returns
+ -----------
+ value: np.array
+ powerlaw distribution
+ """
+
+ # generates random variates of power law distribution
+ vrs = powerlaw.Power_Law(xmin=xmin, parameters=[a]).generate_random(N)
+
+ return vrs
+
+
+
+
+[docs]
+def generate_power_law_dist_bounded(
+ N: int, a: float, xmin: float, xmax: float, seed: int = -1
+):
+ """
+ Generate a power law distribution of floats p(k) ~ x^(-a) for a>1
+ which is bounded by xmin and xmax
+
+ parameters :
+ N: int
+ number of samples in powerlaw distribution (pwd).
+ a:
+ exponent of the pwd.
+ xmin:
+ min value in pwd.
+ xmax:
+ max value in pwd.
+ """
+
+ from numpy.random import rand, randint
+ from numpy import power
+
+ data = np.zeros(N)
+ x0p = power(xmin, (-a + 1.0))
+ x1p = power(xmax, (-a + 1.0))
+ alpha = 1.0 / (-a + 1.0)
+
+ for i in range(N):
+ r = rand()
+ data[i] = power((x1p - x0p) * r + x0p, alpha)
+ return data
+
+
+
+
+[docs]
+def generate_power_law_discrete(
+ N: int, a: float, xmin: float, xmax: float, seed: int = -1
+):
+ """
+ Generate a power law distribution of p(k) ~ x^(-a) for a>1,
+ with discrete values.
+
+ Parameters:
+ -----------
+ N: int
+ Number of samples in the distribution.
+ a: float
+ Exponent of the power law distribution.
+ xmin: float
+ Minimum value in the power law distribution.
+ xmax: float
+ Maximum value in the power law distribution.
+ seed :int, optional
+ Seed for reproducibility. Defaults to -1.
+
+ Returns:
+ -------
+ np.array
+ Power law distribution with discrete values.
+ """
+
+ if seed != -1:
+ np.random.seed(seed)
+
+ if seed != None:
+ np.random.seed(seed)
+
+ X = np.zeros(N, dtype=int)
+ x1p = power(xmax, (-a + 1.0))
+ x0p = power(xmin, (-a + 1.0))
+ alpha = 1.0 / (-a + 1.0)
+
+ for i in range(N):
+ r = np.random.rand()
+ X[i] = int(np.round(power(((x1p - x0p) * r + x0p), alpha)))
+
+ # sum of degrees should be positive
+ from random import randint
+
+ if (np.sum(X) % 2) != 0:
+ i = randint(0, N - 1)
+ X[i] = X[i] + 1
+
+ return X
+
+
+
+
+[docs]
+def tune_min_degree(N: int, a: float, xmin: int, xmax: int, max_iteration: int = 100):
+ """
+ Find the minimum degree value of a power law graph that results in a connected graph
+ """
+
+ for i in range(max_iteration):
+ seq = generate_power_law_discrete(N, a, xmin, xmax, seed=i)
+ if np.sum(seq) % 2 != 0:
+ raise ValueError("The sum of degrees should be even")
+ G = nx.configuration_model(seq)
+ G.remove_edges_from(G.selfloop_edges())
+ G = nx.Graph(G)
+ seq1 = np.asarray([deg for (node, deg) in G.degree_iter()])
+ avg_degree = np.mean(seq1)
+
+ if nx.is_connected(G):
+ break
+ if i == (max_iteration - 1):
+ raise ValueError("Unable to find a connected graph with the given parameters")
+ return avg_degree, G
+
+
+
+
+[docs]
+def make_powerlaw_graph(
+ N: int,
+ a: float,
+ avg_degree: int,
+ xmin: int = 1,
+ xmax: int = 10000,
+ seed: int = -1,
+ xtol=0.01,
+ degree_interval=5.0,
+ plot=False,
+ **kwargs,
+):
+ """
+ make a powerlaw graph with the given parameters
+
+ Parameters
+ ----------
+ N:
+ number of nodes
+ a: float
+ exponent of the power law distribution
+ avg_degree:
+ expected average degree
+ xmin: int, optional
+ minimum value in the power law distribution. Default is 1.
+ xmax: int, optional
+ maximum value in the power law distribution. Default is 10000.
+ seed: int, optional
+ Seed for reproducibility. Default is -1.
+ xtol: float, optional
+ tolerance for bisection method. Default is 0.01.
+ degree_interval: float, optional
+ interval for bisection method. Default is 5.0.
+ plot: bool, optional
+ If True, plot the power law distribution. Default is False.
+ kwargs: obtional
+ additional keyword arguments for plot_pdf function.
+
+ """
+
+ color = kwargs.get("color", "k")
+ linestyle = kwargs.get("linestyle", "-")
+ lw = kwargs.get("lw", 2)
+
+ xmin_tuned, G = bisect(
+ lambda x: tune_min_degree(N, a, x, xmax) - avg_degree,
+ xmin,
+ xmin + degree_interval,
+ xtol=xtol,
+ )
+ sample_seq = np.asarray([deg for (node, deg) in G.degree_iter()])
+ avg_degree = np.mean(sample_seq)
+
+ fit = powerlaw.Fit(sample_seq, discrete=True)
+ if plot:
+ ax = fit.plot_pdf(linewidth=2, label=str("pdf, %.2f" % a))
+ fit.power_law.plot_pdf(c=color, linestyle=linestyle, lw=lw, ax=ax)
+
+ return {
+ "G": G,
+ "avg_degree": avg_degree,
+ "xmin_tuned": xmin_tuned,
+ "fit": fit,
+ "ax": ax,
+ }
+
+
+
+
+[docs]
+def generate_power_law_discrete_its(
+ alpha: float, k_min: int, k_max: int, size: int = 1
+):
+ """
+ Generates the power law discrete distributions using the inverse transform sampling method.
+
+ References
+ -----------
+
+ Devroye, L. (1986). "Non-Uniform Random Variate Generation." Springer-Verlag, New York.
+
+ Parameters
+ ----------
+ alpha :
+ Power law exponent.
+ k_min :
+ Minimum degree.
+ k_max :
+ Maximum degree.
+ size :
+ Number of samples to generate. Defaults to 1.
+
+ Returns
+ -------
+ np.array:
+ Array of generated power law discrete distributions.
+
+
+ Examples
+ ---------
+
+ >>> gamma = 2.5 # Power-law exponent
+ >>> k_min = 1 # Minimum value of k
+ >>> k_max = 1000 # Maximum value of k
+ >>> size = 10000 # Number of samples
+ >>> samples = power_law_discrete(gamma, k_min, k_max, size)
+ """
+
+ # Calculate the normalization constant
+ norm = zeta(alpha, k_min) - zeta(alpha, k_max + 1)
+
+ # Generate uniform random numbers
+ u = np.random.random(size=size)
+
+ # Initialize the result array
+ result = np.zeros(size, dtype=int)
+
+ # Inverse transform sampling
+ for i in range(size):
+ cdf = 0
+ for k in range(k_min, k_max + 1):
+ cdf += (k**-alpha) / norm
+ if u[i] <= cdf:
+ result[i] = k
+ break
+
+ return result
+
+
+
+
+[docs]
+def get_sample_dataset_path():
+ path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ path = os.path.join(path, "netsci/datasets/")
+ return path
+
+
\n", + " | num_nodes | \n", + "num_edges | \n", + "avg_degree | \n", + "directed | \n", + "name | \n", + "
---|---|---|---|---|---|
0 | \n", + "23133 | \n", + "93439 | \n", + "8.078416 | \n", + "False | \n", + "Collaboration | \n", + "
1 | \n", + "192244 | \n", + "609066 | \n", + "6.336385 | \n", + "False | \n", + "Internet | \n", + "
2 | \n", + "4941 | \n", + "6594 | \n", + "2.669095 | \n", + "False | \n", + "PowerGrid | \n", + "
3 | \n", + "2018 | \n", + "2930 | \n", + "2.903865 | \n", + "False | \n", + "Protein | \n", + "
4 | \n", + "36595 | \n", + "91826 | \n", + "5.018500 | \n", + "True | \n", + "PhoneCalls | \n", + "
5 | \n", + "449673 | \n", + "4689479 | \n", + "20.857285 | \n", + "True | \n", + "Citation | \n", + "
6 | \n", + "1039 | \n", + "5802 | \n", + "11.168431 | \n", + "True | \n", + "Metabolic | \n", + "
7 | \n", + "57194 | \n", + "103731 | \n", + "3.627339 | \n", + "True | \n", + "|
8 | \n", + "325729 | \n", + "1497134 | \n", + "9.192513 | \n", + "True | \n", + "WWW | \n", + "
' + + '' + + _("Hide Search Matches") + + "
" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/examples/chap_02.html b/examples/chap_02.html new file mode 100644 index 0000000..ed9a2ef --- /dev/null +++ b/examples/chap_02.html @@ -0,0 +1,862 @@ + + + + + + + +Code by : Abolfazl Ziaeemehr - https://github.com/Ziaeemehr
+ +[1]:
+
# uncomment and run this line to install the package on colab
+# !pip install "git+https://github.com/Ziaeemehr/netsci.git" -q
+
[2]:
+
import netsci
+import numpy as np
+import networkx as nx
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+from netsci.plot import plot_graph
+from netsci.analysis import find_sap, find_hamiltonian_path
+
[3]:
+
np.random.seed(0)
+
[4]:
+
# make a random graph with n nodes and p% probability of edge connection
+num_nodes = 8
+probability = .4
+seed = 2
+graph = nx.gnp_random_graph(num_nodes, probability, seed=2, directed=False)
+
+# degree distribution
+degrees = dict(graph.degree())
+print("Degrees:", degrees)
+
+# calculate the average degree
+average_degree = np.mean(list(degrees.values()))
+print("Average degree:", average_degree)
+
+# adjacency matrix
+adjacency_matrix = nx.to_numpy_array(graph).astype(int)
+print("Adjacency matrix:\n", adjacency_matrix)
+
+# edges
+edges = list(graph.edges())
+print("Edges:", edges)
+
+# plot the graph
+plot_graph(graph, node_size=1000,
+ node_color='darkred',
+ edge_color='gray',
+ figsize=(5, 5),
+ title="Random Graph with {} nodes and {}% edge connection".format(num_nodes, probability*100))
+plt.show()
+
+Degrees: {0: 2, 1: 2, 2: 2, 3: 5, 4: 3, 5: 1, 6: 3, 7: 2}
+Average degree: 2.5
+Adjacency matrix:
+ [[0 0 0 1 1 0 0 0]
+ [0 0 1 0 0 0 1 0]
+ [0 1 0 1 0 0 0 0]
+ [1 0 1 0 0 1 1 1]
+ [1 0 0 0 0 0 1 1]
+ [0 0 0 1 0 0 0 0]
+ [0 1 0 1 1 0 0 0]
+ [0 0 0 1 1 0 0 0]]
+Edges: [(0, 3), (0, 4), (1, 2), (1, 6), (2, 3), (3, 5), (3, 6), (3, 7), (4, 6), (4, 7)]
+
[5]:
+
# shortest path, find distance between two nodes
+source = np.random.randint(0, len(graph)) # random source node
+target = np.random.randint(0, len(graph)) # random target node
+shortest_path = nx.shortest_path(graph, source, target)
+print("Shortest path from", source, "to", target, ":", shortest_path)
+
+# diameter : maximal shortest path length
+if nx.is_connected(graph):
+ diameter = nx.diameter(graph)
+ print("Diameter:", diameter)
+
+# average shortest path length
+avg_shortest_path_length = nx.average_shortest_path_length(graph)
+print(f"Average shortest path length: {avg_shortest_path_length:.2f}")
+
+Shortest path from 3 to 0 : [3, 0]
+Diameter: 3
+Average shortest path length: 1.82
+
[6]:
+
# directed graph
+graph_dir = nx.to_directed(graph)
+plot_graph(graph_dir,
+ node_size=1000,
+ node_color='darkred',
+ edge_color='gray',
+ figsize=(5, 5),
+ seed=1,
+ title="Random DGraph with {} nodes and {}% edge connection".format(num_nodes, probability*100));
+
[7]:
+
# weighted graph
+seed = 3
+np.random.seed(seed) # to fix the plot
+
+num_nodes = 5
+probability = 0.8
+graph_w = nx.erdos_renyi_graph(num_nodes, probability, seed=seed)
+
+for (u,v) in graph_w.edges():
+ graph_w[u][v]['weight'] = np.random.randint(1, 10)
+
+# plot the weighted graph
+edge_labels = nx.get_edge_attributes(graph_w, 'weight')
+
+plot_graph(graph_w,
+ with_labels=True,
+ node_color='lightblue',
+ node_size=700,
+ font_size=12,
+ edge_labels=edge_labels,
+ figsize=(5, 5),
+ title="Weighted Random Network")
+
+weighted_adjacency_matrix = nx.to_numpy_array(graph_w, weight='weight').astype(int)
+print("Weighted adjacency matrix:\n", weighted_adjacency_matrix)
+
+Weighted adjacency matrix:
+ [[0 9 4 9 9]
+ [9 0 1 6 4]
+ [4 1 0 0 6]
+ [9 6 0 0 8]
+ [9 4 6 8 0]]
+
self avoiding path
+[8]:
+
# Create a graph
+G = nx.Graph()
+edges = [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E'), ('C', 'F'), ('E', 'F')]
+G.add_edges_from(edges)
+
+# Find all self-avoiding paths from 'A' to 'F'
+start_node = 'A'
+target_node = 'F'
+all_saps = list(find_sap(G, start_node, target_node))
+
+for path in all_saps:
+ print("->".join(path))
+
+plot_graph(G, seed=2, figsize=(3, 3))
+
+A->B->E->F
+A->C->F
+
[8]:
+
+<AxesSubplot:>
+
A Hamiltonian path is a path in a graph that visits each vertex exactly once.
+[9]:
+
# Example usage
+G = nx.Graph()
+G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 1)])
+
+plot_graph(G, seed=2, figsize=(3, 3))
+
+path = find_hamiltonian_path(G)
+if path:
+ print("Hamiltonian Path found:", path)
+else:
+ print("No Hamiltonian Path found")
+
+Hamiltonian Path found: (1, 2, 3, 4, 5, 6)
+
[10]:
+
# hamiltonian path of weighted graph:
+path = find_hamiltonian_path(graph_w)
+if path:
+ print("Hamiltonian Path found:", path)
+else:
+ print("No Hamiltonian Path found")
+
+Hamiltonian Path found: (0, 1, 2, 4, 3)
+
Adjacency List
[11]:
+
G = nx.Graph()
+edges = [(1, 2), (1, 3), (2, 4), (2, 5), (3, 6), (5, 6)]
+G.add_edges_from(edges)
+
+plot_graph(G, seed=2, figsize=(3, 3))
+
+adjacency_matrix = nx.to_numpy_array(G).astype(int)
+print(f"adjacency matrix\n {adjacency_matrix}")
+
+
+adjacency_list = {n: list(neighbors) for n, neighbors in G.adj.items()}
+print(f"adjacency list\n {adjacency_list}")
+
+adjacency matrix
+ [[0 1 1 0 0 0]
+ [1 0 0 1 1 0]
+ [1 0 0 0 0 1]
+ [0 1 0 0 0 0]
+ [0 1 0 0 0 1]
+ [0 0 1 0 1 0]]
+adjacency list
+ {1: [2, 3], 2: [1, 4, 5], 3: [1, 6], 4: [2], 5: [2, 6], 6: [3, 5]}
+
Adjaceccy list of directed graph:
[12]:
+
G = nx.DiGraph()
+edges = [(1, 2), (1, 3), (2, 4), (2, 5), (3, 6), (5, 6)]
+G.add_edges_from(edges)
+plot_graph(G, seed=2, figsize=(3, 3))
+
+adjacency_matrix = nx.to_numpy_array(G).astype(int)
+print(f"adjacency matrix\n {adjacency_matrix}")
+
+adjacency_list = {n: list(neighbors) for n, neighbors in G.adj.items()}
+print(f"adjacency list\n {adjacency_list}")
+
+adjacency matrix
+ [[0 1 1 0 0 0]
+ [0 0 0 1 1 0]
+ [0 0 0 0 0 1]
+ [0 0 0 0 0 0]
+ [0 0 0 0 0 1]
+ [0 0 0 0 0 0]]
+adjacency list
+ {1: [2, 3], 2: [4, 5], 3: [6], 4: [], 5: [6], 6: []}
+
Implementation of BFS for Graph using Adjacency List:
+[13]:
+
from collections import deque
+
+# Function to perform Breadth First Search on a graph
+# represented using adjacency list
+def bfs(adjList, startNode, visited):
+ # Create a queue for BFS
+ q = deque()
+
+ # Mark the current node as visited and enqueue it
+ visited[startNode] = True
+ q.append(startNode)
+
+ # Iterate over the queue
+ while q:
+ # Dequeue a vertex from queue and print it
+ currentNode = q.popleft()
+ print(currentNode, end=" ")
+
+ # Get all adjacent vertices of the dequeued vertex
+ # If an adjacent has not been visited, then mark it visited and enqueue it
+ for neighbor in adjList[currentNode]:
+ if not visited[neighbor]:
+ visited[neighbor] = True
+ q.append(neighbor)
+
+# Function to add an edge to the graph
+def addEdge(adjList, u, v):
+ adjList[u].append(v)
+
+def main():
+ # Number of vertices in the graph
+ vertices = 5
+
+ # Adjacency list representation of the graph
+ adjList = [[] for _ in range(vertices)]
+
+ # Add edges to the graph
+ addEdge(adjList, 0, 1)
+ addEdge(adjList, 0, 2)
+ addEdge(adjList, 1, 3)
+ addEdge(adjList, 1, 4)
+ addEdge(adjList, 2, 4)
+
+ # Mark all the vertices as not visited
+ visited = [False] * vertices
+
+ # Perform BFS traversal starting from vertex 0
+ print("Breadth First Traversal starting from vertex 0:", end=" ")
+ bfs(adjList, 0, visited)
+
+ #plot the graph
+ G = nx.Graph()
+ G.add_edges_from([(0, 1), (0, 2), (1, 3), (1, 4), (2, 4)])
+ plot_graph(G, seed=2, figsize=(3, 3))
+
+if __name__ == "__main__":
+ main()
+
+Breadth First Traversal starting from vertex 0: 0 1 2 3 4
+
[14]:
+
from netsci.analysis import graph_info
+graph_info(graph_w)
+
+Graph information
+Directed : False
+Number of nodes : 5
+Number of edges : 9
+Average degree : 3.6000
+Connectivity : connected
+
[15]:
+
import networkx as nx
+import pandas as pd
+from netsci.analysis import average_degree
+from netsci.utils import list_sample_graphs, load_sample_graph
+
[21]:
+
nets = list(list_sample_graphs().keys())
+
[17]:
+
G = load_sample_graph("Internet")
+
[18]:
+
graph_info(G)
+
+Graph information
+Directed : False
+Number of nodes : 192244
+Number of edges : 609066
+Average degree : 6.3364
+Connectivity : disconnected
+
[19]:
+
for net in tqdm(nets, desc="Processing sample graphs"):
+ print(net)
+
+Processing sample graphs: 100%|██████████| 10/10 [00:00<00:00, 19463.13it/s]
+
+Collaboration
+Internet
+PowerGrid
+Protein
+PhoneCalls
+Citation
+Metabolic
+Email
+WWW
+Actor
+
+
+
[20]:
+
data_list = []
+
+for net in tqdm(nets[:-1], desc="Processing sample graphs"):
+ G = load_sample_graph(net)
+ num_nodes = G.number_of_nodes()
+ num_edges = G.number_of_edges()
+ avg_degree = average_degree(G)
+ directed = nx.is_directed(G)
+
+ # Append a dictionary of data for this network to the list
+ data_list.append({
+ 'num_nodes': num_nodes,
+ 'num_edges': num_edges,
+ 'avg_degree': avg_degree,
+ "directed": directed,
+ "name": net
+ })
+
+# Create the DataFrame from the list of dictionaries
+df = pd.DataFrame(data_list)
+
+# Display the DataFrame
+df
+
+Processing sample graphs: 100%|██████████| 9/9 [00:33<00:00, 3.72s/it]
+
[20]:
+
+ | num_nodes | +num_edges | +avg_degree | +directed | +name | +
---|---|---|---|---|---|
0 | +23133 | +93439 | +8.078416 | +False | +Collaboration | +
1 | +192244 | +609066 | +6.336385 | +False | +Internet | +
2 | +4941 | +6594 | +2.669095 | +False | +PowerGrid | +
3 | +2018 | +2930 | +2.903865 | +False | +Protein | +
4 | +36595 | +91826 | +5.018500 | +True | +PhoneCalls | +
5 | +449673 | +4689479 | +20.857285 | +True | +Citation | +
6 | +1039 | +5802 | +11.168431 | +True | +Metabolic | +
7 | +57194 | +103731 | +3.627339 | +True | +|
8 | +325729 | +1497134 | +9.192513 | +True | +WWW | +
\n", + " | num_nodes | \n", + "num_edges | \n", + "avg_degree | \n", + "directed | \n", + "name | \n", + "
---|---|---|---|---|---|
0 | \n", + "23133 | \n", + "93439 | \n", + "8.078416 | \n", + "False | \n", + "Collaboration | \n", + "
1 | \n", + "192244 | \n", + "609066 | \n", + "6.336385 | \n", + "False | \n", + "Internet | \n", + "
2 | \n", + "4941 | \n", + "6594 | \n", + "2.669095 | \n", + "False | \n", + "PowerGrid | \n", + "
3 | \n", + "2018 | \n", + "2930 | \n", + "2.903865 | \n", + "False | \n", + "Protein | \n", + "
4 | \n", + "36595 | \n", + "91826 | \n", + "5.018500 | \n", + "True | \n", + "PhoneCalls | \n", + "
5 | \n", + "449673 | \n", + "4689479 | \n", + "20.857285 | \n", + "True | \n", + "Citation | \n", + "
6 | \n", + "1039 | \n", + "5802 | \n", + "11.168431 | \n", + "True | \n", + "Metabolic | \n", + "
7 | \n", + "57194 | \n", + "103731 | \n", + "3.627339 | \n", + "True | \n", + "|
8 | \n", + "325729 | \n", + "1497134 | \n", + "9.192513 | \n", + "True | \n", + "WWW | \n", + "
Code by : Abolfazl Ziaeemehr - https://github.com/Ziaeemehr
+ +[1]:
+
# uncomment and run this line to install the package on colab
+# !pip install "git+https://github.com/Ziaeemehr/netsci.git" -q
+
A random network consists of N nodes where each node pair is connected with probability p. To construct a random network we follow these steps: 1) Start with N isolated nodes. 2) Select a node pair and generate a random number between 0 and 1. If the number exceeds p, connect the selected node pair with a link, otherwise leave them disconnected. 3) Repeat step (2) for each of the N(N-1)/2 node pairs.
+[1]:
+
import random
+import numpy as np
+import networkx as nx
+import seaborn as sns
+import matplotlib.pyplot as plt
+from netsci.plot import plot_graph
+
[2]:
+
LABELSIZE = 13
+plt.rc('axes', labelsize=LABELSIZE)
+plt.rc('axes', titlesize=LABELSIZE)
+plt.rc('figure', titlesize=LABELSIZE)
+plt.rc('legend', fontsize=LABELSIZE)
+plt.rc('xtick', labelsize=LABELSIZE)
+plt.rc('ytick', labelsize=LABELSIZE)
+
[3]:
+
def create_random_network(N, p):
+ G = nx.Graph() # Initialize an empty graph
+ G.add_nodes_from(range(N)) # Add N isolated nodes
+
+ # Iterate through each possible node pair
+ for i in range(N):
+ for j in range(i + 1, N):
+ if random.random() <= p: # Generate a random number and compare it with p
+ G.add_edge(i, j) # Connect the nodes if the condition is met
+
+ return G
+
+# Example usage:
+N = 10 # Number of nodes
+p = 0.3 # Probability of edge creation
+
+seed=2
+random.seed(seed)
+np.random.seed(seed)
+
+random_network = create_random_network(N, p)
+plot_graph(random_network, seed=2, figsize=(5, 3), title="Random Network")
+
[3]:
+
+<AxesSubplot:title={'center':'Random Network'}>
+
Other option would be to use the nx.gnp_random_graph
function from NetworkX, which generates random graphs with a given number of nodes and a given probability of edge creation.
G = nx.gnp_random_graph(N, p)
+
Degree distribution in a random network follows a binomial distribution.
+[4]:
+
# make a random graph with N nodes and average degree of k
+np.random.seed(2)
+
+num_nodes = [100, 1000, 10000]
+average_degree = 50
+lambd = 50
+colors1 = plt.cm.Reds(np.linspace(0.2, 0.6, len(num_nodes)))
+
+for i in range(len(num_nodes)):
+ probability = average_degree / num_nodes[i]
+ graph_b = nx.gnp_random_graph(num_nodes[i], probability)
+ degrees = [d for n, d in graph_b.degree()]
+ sns.kdeplot(degrees, fill=False, label=f"N.bino={num_nodes[i]}", color=colors1[i])
+
+s = np.random.poisson(lambd, num_nodes[-1])
+sns.kdeplot(s, fill=False, label=f"N.pois={num_nodes[i]}", color='b')
+
+plt.xlabel("k")
+plt.ylabel("P(k)")
+plt.legend();
+
[5]:
+
import networkx as nx
+import matplotlib.pyplot as plt
+
+# Step 1: Generate a random graph (Erdős-Rényi model)
+n = 20 # number of nodes
+p = 0.12 # probability of edge creation
+G = nx.erdos_renyi_graph(n, p)
+
+# Step 2: Find all connected components
+connected_components = list(nx.connected_components(G))
+
+# Step 3: Calculate the size of each connected component
+component_sizes = [len(component) for component in connected_components]
+
+# Display the graph and component sizes
+print("Connected Components:")
+for i, component in enumerate(connected_components):
+ print(f"Component {i + 1}: Size {len(component)}")
+
+# Optionally, visualize the graph
+plot_graph(G, seed=2, figsize=(5, 3));
+
+Connected Components:
+Component 1: Size 19
+Component 2: Size 1
+
Plotting the size of giant connected component vs average degree
+[6]:
+
N = int(1e4)
+print(f"N={N}, Ln(N)= {np.log(N)}")
+k_avg = [.1, 0.5, 0.9, 1.0] + np.linspace(1.1, np.log(N), 10).tolist()
+giant_component_sizes = []
+for i in range(len(k_avg)):
+ p = k_avg[i] / N
+ G = nx.erdos_renyi_graph(N, p)
+ connected_components = list(nx.connected_components(G))
+ component_sizes = [len(component) for component in connected_components]
+ giant_component_size = max(component_sizes)
+ giant_component_sizes.append(giant_component_size)
+
+ print(f"average k = {k_avg[i]:10.3f}, giant_component_size={giant_component_size:10d}")
+
+giant_component_sizes = np.array(giant_component_sizes)/N
+plt.plot(k_avg, giant_component_sizes, marker='o', label='Giant Component Size')
+plt.xlabel(r'Average Degree')
+plt.ylabel(r'$N_G / N$');
+
+N=10000, Ln(N)= 9.210340371976182
+average k = 0.100, giant_component_size= 4
+average k = 0.500, giant_component_size= 11
+average k = 0.900, giant_component_size= 164
+average k = 1.000, giant_component_size= 480
+average k = 1.100, giant_component_size= 1682
+average k = 2.001, giant_component_size= 8028
+average k = 2.902, giant_component_size= 9363
+average k = 3.803, giant_component_size= 9755
+average k = 4.705, giant_component_size= 9909
+average k = 5.606, giant_component_size= 9967
+average k = 6.507, giant_component_size= 9989
+average k = 7.408, giant_component_size= 9999
+average k = 8.309, giant_component_size= 9997
+average k = 9.210, giant_component_size= 9998
+
[4]:
+
from netsci.utils import load_sample_graph, list_sample_graphs
+from netsci.analysis import graph_info
+
+graphs = list_sample_graphs()
+graphs.keys()
+
[4]:
+
+dict_keys(['Collaboration', 'Internet', 'PowerGrid', 'Protein', 'PhoneCalls', 'Citation', 'Metabolic', 'Email', 'WWW', 'Actor'])
+
[5]:
+
G_collab = load_sample_graph('Collaboration', verbose=True)
+graph_info(G_collab)
+
+Successfully loaded Collaboration
+================================
+Scientific collaboration network based on the arXiv preprint archive's
+ Condense Matter Physics category covering the period from January 1993 to April 2003.
+ Each node represents an author, and two nodes are connected if they co-authored at
+ least one paper in the dataset. Ref: Leskovec, J., Kleinberg, J., & Faloutsos, C. (2007).
+ Graph evolution: Densification and shrinking diameters.
+ ACM Transactions on Knowledge Discovery from Data (TKDD), 1(1), 2.
+Graph information
+Directed : False
+Number of nodes : 23133
+Number of edges : 93439
+Average degree : 8.0784
+Connectivity : disconnected
+
Figure 3.6
+[10]:
+
from scipy.stats import poisson
+from collections import Counter
+
+fig, ax = plt.subplots(1,3, figsize=(15,4))
+
+c = 0
+for net in ["Internet", "Collaboration", "Protein"]:
+ G = load_sample_graph(net)
+ degrees = [G.degree(n) for n in G.nodes()]
+ degree_count = Counter(degrees)
+ k, pk = zip(*degree_count.items())
+ k = np.array(k)
+ pk = np.array(pk)/sum(pk)
+
+
+ ax[c].loglog(k, pk, 'k.', label='real')
+ ax[c].set_xlabel("k")
+ ax[c].set_ylabel("pk");
+ ymin, ymax = np.min(pk)*0.9, np.max(pk)*1.1
+
+ # add poisson distribution to graph
+
+ k = np.arange(0, max(degrees)+1)
+ pk_poisson = poisson.pmf(k, np.mean(degrees))
+ ax[c].loglog(k, pk_poisson, 'r', label='poisson')
+ # plt.ylim([1e-5, 1])
+ ax[c].legend(frameon=False);
+ ax[c].set_ylim([ymin, ymax])
+ ax[c].set_title(net)
+ c += 1
+
+plt.tight_layout()
+
The local clustering coefficient of a random network is
+To analyze the dependence of the average path length \(d(p)\) and the clustering coefficient \(\langle C(p) \rangle\) on the rewiring parameter \(p\) for a small-world network, you can use the Watts-Strogatz model. This model begins with a regular lattice and introduces randomness by rewiring each edge with probability \(p\). Here’s a step-by-step guide on how to perform this analysis:
+Generate a regular lattice: Create a regular ring lattice with \(N\) nodes where each node is connected to its \(k\) nearest neighbors.
Rewire edges: For each edge in the lattice, rewire it with probability \(p\). This involves replacing the existing edge with a new edge that connects the node to a randomly chosen node in the network.
Compute :math:`d(p)` and :math:`langle C(p) rangle`:
+\(d(p)\) is the average shortest path length between all pairs of nodes in the network.
\(\langle C(p) \rangle\) is the average clustering coefficient of all nodes in the network.
Normalize by :math:`d(0)` and :math:`langle C(0) rangle`:
+\(d(0)\) is the average path length of the regular lattice (when \(p=0\)).
\(\langle C(0) \rangle\) is the average clustering coefficient of the regular lattice (when \(p=0\)).
Plot the results: Plot \(d(p)/d(0)\) and \(\langle C(p) \rangle / \langle C(0) \rangle\) as functions of \(p\) on a log scale to observe the small-world phenomenon.
[10]:
+
import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+
+# Parameters
+N = 1000 # Number of nodes
+k = 10 # Each node is connected to k nearest neighbors in ring topology
+p_values = np.logspace(-4, 0, num=100) # Rewiring probabilities
+
+# Initialize lists to store results
+average_path_lengths = []
+clustering_coefficients = []
+
+# Generate the initial regular lattice
+G0 = nx.watts_strogatz_graph(N, k, 0)
+d0 = nx.average_shortest_path_length(G0)
+C0 = nx.average_clustering(G0)
+
+for p in p_values:
+ G = nx.watts_strogatz_graph(N, k, p)
+ d = nx.average_shortest_path_length(G)
+ C = nx.average_clustering(G)
+ average_path_lengths.append(d / d0)
+ clustering_coefficients.append(C / C0)
+
+# Plotting
+plt.figure(figsize=(5, 4))
+
+# Average path length plot
+plt.plot(p_values, average_path_lengths, marker='o', linestyle='-', color='blue', label=r"$d(p)/d(0)$")
+
+# Clustering coefficient plot
+plt.plot(p_values, clustering_coefficients, marker='o', linestyle='-', color='red', label=r"$\langle C(p) \rangle / \langle C(0) \rangle$")
+plt.xscale('log')
+plt.xlabel('Rewiring probability p')
+plt.legend(frameon=False)
+plt.tight_layout()
+plt.show()
+
[ ]:
+
+
Code by : Abolfazl Ziaeemehr - https://github.com/Ziaeemehr
+ +[1]:
+
# uncomment and run this line to install the package on colab
+# !pip install "git+https://github.com/Ziaeemehr/netsci.git" -q
+
[5]:
+
import numpy as np
+import matplotlib.pyplot as plt
+from netsci.utils import generate_power_law_dist, generate_power_law_dist_bounded
+
[6]:
+
LABELSIZE = 13
+plt.rc('axes', labelsize=LABELSIZE)
+plt.rc('axes', titlesize=LABELSIZE)
+plt.rc('figure', titlesize=LABELSIZE)
+plt.rc('legend', fontsize=LABELSIZE)
+plt.rc('xtick', labelsize=LABELSIZE)
+plt.rc('ytick', labelsize=LABELSIZE)
+# set legend font size
+plt.rc('legend', fontsize=10)
+
Comparing Poisson and Powe-law Distributions
+[7]:
+
import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import poisson
+
+# Parameters
+mean_poisson = 11
+alpha_power_law = 2.1
+x_values = np.arange(1, 1000)
+
+# Poisson Distribution
+poisson_pmf = poisson.pmf(x_values, mean_poisson)
+
+# Power Law Distribution
+power_law_pdf = x_values ** (-alpha_power_law)
+# Normalize power-law PDF to make it a valid probability distribution
+power_law_pdf /= np.sum(power_law_pdf)
+
+# Plotting
+
+fig, ax = plt.subplots(1,2, figsize=(12,4))
+
+ax[0].plot(x_values, poisson_pmf, label='Poisson Distribution (mean=11)')
+ax[0].plot(x_values, power_law_pdf, label='Power Law Distribution (α=-2.1)')
+ax[0].set_xlim([0,50])
+ax[0].set_ylim([0,0.15])
+ax[0].set_xlabel('x')
+ax[0].set_ylabel(r'$p_k$')
+fig.suptitle('Comparison of Poisson and Power Law Distributions')
+ax[0].legend(frameon=False)
+ax[1].loglog(x_values, poisson_pmf, label="poisson")
+ax[1].loglog(x_values, power_law_pdf, label="powerlaw")
+ax[1].set_ylim([1e-6, 1])
+ax[1].set_xlabel('x')
+ax[1].set_ylabel(r'$p_k$')
+ax[1].legend(frameon=False);
+
[10]:
+
from netsci.utils import list_sample_graphs, load_sample_graph
+from netsci.analysis import graph_info
+
+nets = list(list_sample_graphs().keys())
+print(nets)
+
+['Actor', 'Collaboration', 'Internet', 'PowerGrid', 'Protein', 'PhoneCalls', 'Citation', 'Metabolic', 'Email', 'WWW']
+
[11]:
+
from collections import Counter
+from scipy.stats import poisson
+G_collab = load_sample_graph("Collaboration")
+graph_info(G_collab, quick=True)
+in_degrees = list(dict(G_collab.in_degree()).values())
+out_degrees = list(dict(G_collab.out_degree()).values())
+in_degree_count = Counter(in_degrees)
+out_degree_count = Counter(out_degrees)
+
+k_in, pk_in = zip(*in_degree_count.items())
+k_out, pk_out = zip(*out_degree_count.items())
+
+plt.figure(figsize=(6,4))
+plt.loglog(k_in, pk_in, 'r.', label=r"$k_{in}$")
+plt.loglog(k_out, pk_out, 'b.', label=r"$k_{out}$")
+plt.legend(frameon=1)
+plt.xlabel(r"$k_{in}, k_{out}$")
+plt.ylabel("pk");
+
+Graph information
+Directed : True
+Number of nodes : 23133
+Number of edges : 93439
+Average degree : 8.0784
+Connectivity : disconnected
+
[12]:
+
# find the exponent by fitting a power law by powerlaw package
+import powerlaw
+
+for x in [k_in, k_out]:
+ fit = powerlaw.Fit(x) # xmax=50 we can constrain the max value for fitting
+ print(f" α = {fit.power_law.alpha:6.3f}, σ = ± {fit.power_law.sigma:6.3f}") # the exponent
+
+Calculating best minimal value for power law fit
+ α = 3.042, σ = ± 0.327
+Calculating best minimal value for power law fit
+ α = 5.496, σ = ± 0.937
+
+Values less than or equal to 0 in data. Throwing out 0 or negative values
+Values less than or equal to 0 in data. Throwing out 0 or negative values
+
[7]:
+
generate_power_law_dist?
+
+Signature: generate_power_law_dist(N: int, a: float, xmin: float)
+Docstring:
+generate power law random numbers p(k) ~ x^(-a) for a>1
+
+Parameters
+-----------
+N:
+ is the number of random numbers
+a:
+ is the exponent
+xmin:
+ is the minimum value of distribution
+
+Returns
+-----------
+value: np.array
+ powerlaw distribution
+File: ~/git/workshops/netsci/netsci/utils.py
+Type: function
+
[8]:
+
generate_power_law_dist_bounded?
+
+Signature:
+generate_power_law_dist_bounded(
+ N: int,
+ a: float,
+ xmin: float,
+ xmax: float,
+ seed: int = -1,
+)
+Docstring:
+Generate a power law distribution of floats p(k) ~ x^(-a) for a>1
+which is bounded by xmin and xmax
+
+parameters :
+ N: int
+ number of samples in powerlaw distribution (pwd).
+ a:
+ exponent of the pwd.
+ xmin:
+ min value in pwd.
+ xmax:
+ max value in pwd.
+File: ~/git/workshops/netsci/netsci/utils.py
+Type: function
+
plotting the powerlaw distributions
+[9]:
+
def plot_distribution(vrs, N, a, xmin, ax, labelsize=10):
+
+ # plotting the PDF estimated from variates
+ bin_min, bin_max = np.min(vrs), np.max(vrs)
+ bins = 10**(np.linspace(np.log10(bin_min), np.log10(bin_max), 100))
+ counts, edges = np.histogram(vrs, bins, density=True)
+ centers = (edges[1:] + edges[:-1])/2.
+
+ # plotting the expected PDF
+ xs = np.linspace(bin_min, bin_max, N)
+ expected_pdf = [(a-1) * xmin**(a-1) * x**(-a) for x in xs] # according to eq. 4.12 network science barabasi 2016
+ ax.loglog(xs, expected_pdf, color='red', ls='--', label=r"$x^{-\gamma}$,"+ r"${\gamma}$="+f"{-a:.2f}")
+ ax.loglog(centers, counts, 'k.', label='data')
+ ax.legend(fontsize=labelsize)
+ ax.set_xlabel("values")
+ ax.set_ylabel("PDF")
+
[10]:
+
np.random.seed(2)
+
+N = 10000
+a = 3.0
+xmin = 1
+xmax = 100
+
+fig, ax = plt.subplots(1, figsize=(5,3))
+x = generate_power_law_dist_bounded(N, a, xmin, xmax)
+print (np.min(x), np.max(x))
+plot_distribution(x, N, a, xmin, ax)
+
+1.000035809608483 74.39513593875918
+
[11]:
+
# find the exponent by fitting a power law by powerlaw package
+
+import powerlaw
+fit = powerlaw.Fit(x) # xmax=50 we can constrain the max value for fitting
+print(f"{fit.power_law.alpha=}") # the exponent
+print(f"{fit.power_law.sigma=}") # standard error
+
+Calculating best minimal value for power law fit
+fit.power_law.alpha=2.995340848455978
+fit.power_law.sigma=0.02600579145725683
+
Generate descereted power law distribution
+[12]:
+
from netsci.utils import generate_power_law_discrete
+# Example usage
+gamma = 2.5 # Power-law exponent
+k_min = 1 # Minimum value of k
+k_max = 1000 # Maximum value of k
+size = 100000 # Number of samples
+
+samples = generate_power_law_discrete(size, gamma, k_min, k_max, seed=1)
+fig, ax = plt.subplots(1, figsize=(6,4))
+plot_distribution(samples, size, gamma, k_min, ax)
+
[ ]:
+
+
[ ]:
+
+
[ ]:
+
+
Alstott, J., Bullmore, E. and Plenz, D., 2014. powerlaw: a Python package for analysis of heavy-tailed distributions. PloS one, 9(1), p.e85777.
+probability density function (PDF),
cumulative distribution function (CDF)
complementary cumulative distribution (CCDF)
[13]:
+
import powerlaw
+fig, ax = plt.subplots(1, figsize=(6,4))
+fit = powerlaw.Fit(x) # xmax=50
+print(f"{fit.power_law.alpha=}")
+print(f"{fit.power_law.sigma=}")
+print("-"*70)
+print(fit.distribution_compare("power_law", "exponential"))
+
+powerlaw.plot_pdf(x, linear_bins=0, color='k', marker='o', lw=1, ax=ax);
+
+Calculating best minimal value for power law fit
+fit.power_law.alpha=2.995340848455978
+fit.power_law.sigma=0.02600579145725683
+----------------------------------------------------------------------
+(894.9727455051284, 5.263968413468816e-22)
+
[14]:
+
fig, ax = plt.subplots(1, figsize=(6,4))
+fit.plot_pdf(c='b', lw=2, marker="*", label='pdf', ax=ax)
+fit.power_law.plot_pdf(c='b', ax=ax, ls='--', label='fit pdf')
+fit.plot_ccdf(c='r', ax=ax, ls="-", label='ccdf')
+fit.power_law.plot_ccdf(c='r', ax=ax, ls='--', label='fit ccdf')
+ax.legend(frameon=False);
+
[ ]:
+
+
Code by : Abolfazl Ziaeemehr - https://github.com/Ziaeemehr
+ +First, ensure that python-igraph is installed. You can install it using pip:
+[2]:
+
try:
+ import igraph
+ print(igraph.__version__)
+except ImportError:
+ print("igraph is not installed.")
+
+# If `igraph` is not installed, you can install it using the following command (uncomment the following line):
+# !pip install python-igraph
+
+0.11.6
+
Empty Graph
To create an empty graph:
+[2]:
+
import igraph as ig
+g = ig.Graph()
+
To create a graph with 10 nodes and specific edges, also get summary of the graph with print(g)
:
[3]:
+
g = ig.Graph(n=10, edges=[[0, 1], [0, 5]])
+print(g)
+
+IGRAPH U--- 10 2 --
++ edges:
+0--1 0--5
+
This will display the number of vertices and edges, and list the edges if the graph is small.
+You can set and retrieve attributes for graphs, vertices, and edges.
+[4]:
+
import igraph as ig
+
+# Create a graph with 3 nodes
+g = ig.Graph(n=3)
+
+# Assign a 'color' attribute to all nodes
+g.vs["color"] = ["red", "green", "blue"]
+
+# Assign a 'label' attribute to the first node
+g.vs[0]["label"] = "Node 1"
+
+# Assign a 'label' attribute to the second node
+g.vs[1]["label"] = "Node 2"
+
[5]:
+
# Create a graph with edges
+g.add_edges([(0, 1), (1, 2)])
+
+# Assign a 'weight' attribute to all edges
+g.es["weight"] = [1.5, 2.5]
+
[6]:
+
# Get all attributes for the first node
+node_attributes = g.vs[0].attributes()
+print(node_attributes)
+
+{'color': 'red', 'label': 'Node 1'}
+
[7]:
+
# Get the 'color' attribute for all nodes
+colors = g.vs["color"]
+print(colors)
+
+['red', 'green', 'blue']
+
[8]:
+
# Get all attributes for the first edge
+edge_attributes = g.es[0].attributes()
+print(edge_attributes)
+
+{'weight': 1.5}
+
[9]:
+
# Get the 'weight' attribute for all edges
+weights = g.es["weight"]
+print(weights)
+
+[1.5, 2.5]
+
[10]:
+
import os
+from netsci.utils import list_sample_graphs
+from netsci.utils import get_sample_dataset_path
+from netsci.utils import download_sample_dataset
+
+def load_edges(filepath):
+ edges = []
+ with open(filepath, 'r') as file:
+ for line in file:
+ if line.startswith('#'):
+ continue # Skip comments
+ A, B = map(int, line.split())
+ edges.append((A, B))
+ return edges
+
+def load_graphi(filepath:str, directed:bool=False):
+ edges = load_edges(filepath)
+ G = ig.Graph(edges=edges, directed=directed)
+
+ return G
+
+path = get_sample_dataset_path()
+
+# make sure you have downloaded the sample dataset
+download_sample_dataset()
+
+file_name = os.path.join(path, "collaboration.edgelist.txt")
+print(f"{path=}")
+
+G = load_graphi(file_name, directed=False)
+
+print(f"{'Number of vertices:':<30s} {G.vcount():20d}")
+print(f"{'Number of edges:':<30s} {G.ecount():20d}")
+print(f"{'Is directed:':<30s} {str(G.is_directed()):>20s}")
+print(f"{'Density:':<30s} {G.density():20.6f}")
+print(f"{'Average clustering coefficient:':30s}{G.transitivity_undirected():20.6f}")
+
+File /Users/tng/git/workshops/netsci/netsci/datasets/networks.zip already exists.
+path='/Users/tng/git/workshops/netsci/netsci/datasets/'
+Number of vertices: 23133
+Number of edges: 93439
+Is directed: False
+Density: 0.000349
+Average clustering coefficient: 0.264317
+
[11]:
+
# need to install matplotlib and pycairo
+# !pip install pycairo -q
+
[12]:
+
import matplotlib.pyplot as plt
+
+fig, ax = plt.subplots()
+
+# Compute a layout
+layout = g.layout("kk") # Kamada-Kawai layout
+
+# Define visual style
+visual_style = {}
+visual_style["vertex_size"] = 20
+visual_style["vertex_label"] = range(g.vcount())
+visual_style["layout"] = layout
+visual_style["bbox"] = (300, 300) # Bounding box size
+visual_style["margin"] = 20
+
+# Plot the graph
+ig.plot(g, **visual_style)
+
+# Plot the graph in the axes
+ig.plot(g, target=ax, **visual_style)
+plt.show()
+
[ ]:
+
+
Code by : Abolfazl Ziaeemehr - https://github.com/Ziaeemehr
+ +NetworkX provides several types of graphs: - Graph: An undirected graph. - DiGraph: A directed graph. - MultiGraph: An undirected graph that can have multiple edges between nodes. - MultiDiGraph: A directed graph with multiple edges.
+You can create an empty graph as follows:
+[8]:
+
import numpy as np
+import networkx as nx
+
+G = nx.Graph() # or nx.DiGraph(), nx.MultiGraph(), nx.MultiDiGraph()
+
You can add nodes and edges to a graph using the following methods:
+[13]:
+
# Add a single node
+G.add_node(1)
+
+# Add multiple nodes
+G.add_nodes_from([2, 3])
+
+# Add an edge between two nodes
+G.add_edge(1, 2)
+
+# Add multiple edges
+G.add_edges_from([(2, 3), (3, 4)])
+
+# get degree distribution
+degrees = dict(G.degree())
+degrees
+
[13]:
+
+{1: 1, 2: 2, 3: 2, 4: 1}
+
Nodes can be any hashable Python object except None.
+You can also add attributes to nodes and edges:
+[4]:
+
# Add node with attributes
+G.add_node(4, color='red')
+
+# Add edge with attributes
+G.add_edge(1, 3, weight=4.2)
+
NetworkX provides a wide range of graph algorithms, such as shortest path, clustering, and many others. For example, to find the shortest path using Dijkstra’s algorithm:
+[5]:
+
# Create a weighted graph
+G = nx.Graph()
+edges = [('a', 'b', 0.3), ('b', 'c', 0.9), ('a', 'c', 0.5), ('c', 'd', 1.2)]
+G.add_weighted_edges_from(edges)
+
+# Find shortest path
+path = nx.dijkstra_path(G, 'a', 'd')
+print(path) # Output: ['a', 'c', 'd']
+
+['a', 'c', 'd']
+
NetworkX includes basic functionality for visualizing graphs, although it is primarily designed for graph analysis. You can use Matplotlib to draw graphs:
+[6]:
+
import matplotlib.pyplot as plt
+
+G = nx.complete_graph(5)
+nx.draw(G, with_labels=True)
+plt.show()
+
[ ]:
+
+
+ |
+ |
+ |
+ | + |
+ | + |
+ |
+ | + |
|
+
+ | + |
+ |
+ |
Barabási, A.L., 2013. Network science. Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences, 371(1987), p.20120375.
using on Colab (Recommended)
+Go to examples
Open a notebook and click on “open on colab”
Uncomment the cell with pip install command to install the netsci package.
using on local machines
pip3 install -e .
+# or
+pip install "git+https://github.com/Ziaeemehr/netsci.git"
+
View Notebook |
+Open in Colab |
+
---|---|
+ | + |
+ | + |
+ | + |
+ | + |
+ | + |
Finds all self-avoiding paths (SAPs) in a given graph from a start node to a target node. +A self-avoiding path is a path that does not revisit any node.
+The input graph where SAPs will be found.
+The node where the search for SAPs starts.
+The node where the search for SAPs ends.
+Internal parameter used to keep track of the current path during the search.
+A self-avoiding path from the start node to the target node.
+Examples
+>>> import networkx as nx
+>>> G = nx.Graph()
+>>> edges = [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E'), ('C', 'F'), ('E', 'F')]
+>>> G.add_edges_from(edges)
+>>> start_node = 'A'
+>>> target_node = 'F'
+>>> all_saps = list(find_sap(G, start_node, target_node))
+>>> for path in all_saps:
+>>> print("->".join(path))
+
find the Hamiltonian path in given graph.
+input graph.
+Check if the graph is connected.
+The input graph.
+for directed graphs, it returns +- “weakly connected” +- “strongly connected” +- “disconnected”. +for undirected graphs, +- “connected” +- “disconnected”.
+Generate various graph information.
+The input graph for which the information is to be generated.
+Calculate the longest shortest path (diameter) in a given graph.
+The input graph, which can be directed or undirected. +The graph should be connected, otherwise the diameter will not be defined.
+The longest shortest path (diameter) in the graph. +If the graph is empty, returns 0. +If the graph is not connected, returns float(‘inf’).
+Generate an adjacency list representation of a given graph.
+The input graph for which the adjacency list is to be generated.
+A dictionary where each key is a node in the graph and the corresponding value is a list of neighboring nodes.
+Load a graph and return it as a NetworkX graph.
+The name of the graph. Get names from netsci.utils.show_sample_graphs()
.
If True, print information about the loaded graph. Default is True.
+Loaded graph.
+generate power law random numbers p(k) ~ x^(-a) for a>1
+is the number of random numbers
+is the exponent
+is the minimum value of distribution
+powerlaw distribution
+Generate a power law distribution of floats p(k) ~ x^(-a) for a>1 +which is bounded by xmin and xmax
+number of samples in powerlaw distribution (pwd).
+exponent of the pwd.
+min value in pwd.
+max value in pwd.
+Generate a power law distribution of p(k) ~ x^(-a) for a>1, +with discrete values.
+Find the minimum degree value of a power law graph that results in a connected graph
+make a powerlaw graph with the given parameters
+number of nodes
+exponent of the power law distribution
+expected average degree
+minimum value in the power law distribution. Default is 1.
+maximum value in the power law distribution. Default is 10000.
+Seed for reproducibility. Default is -1.
+tolerance for bisection method. Default is 0.01.
+interval for bisection method. Default is 5.0.
+If True, plot the power law distribution. Default is False.
+additional keyword arguments for plot_pdf function.
+Generates the power law discrete distributions using the inverse transform sampling method.
+Power law exponent.
+Minimum degree.
+Maximum degree.
+Number of samples to generate. Defaults to 1.
+Array of generated power law discrete distributions.
+References
+Devroye, L. (1986). “Non-Uniform Random Variate Generation.” Springer-Verlag, New York.
+Examples
+>>> gamma = 2.5 # Power-law exponent
+>>> k_min = 1 # Minimum value of k
+>>> k_max = 1000 # Maximum value of k
+>>> size = 10000 # Number of samples
+>>> samples = power_law_discrete(gamma, k_min, k_max, size)
+
Plots a NetworkX graph with customizable options.
+A NetworkX graph object (e.g., nx.Graph, nx.DiGraph).
+Additional keyword arguments to customize the plot. These can include:
+Color of the nodes (can be a single color or a list of colors).
+Size of the nodes (single value or list of sizes).
+Color of the edges (can be a single color or a list of colors).
+Width of the edges.
+Whether to draw node labels or not.
+Size of the font for node labels.
+Color of the font for node labels.
+Title of the plot.
+Seed for the random layout algorithm.
+Size of the figure.
+Axes object to draw the plot on. Defaults to None, which will create a new figure.
+Graph layout (e.g., nx.spring_layout, nx.circular_layout), nx.kamada_kaway_layout(G). +Defaults to nx.spring_layout(G).
++ n | ||
+ |
+ netsci | + |
+ |
+ netsci.analysis | + |
+ |
+ netsci.plot | + |
+ |
+ netsci.utils | + |
+ Searching for multiple words only shows matches that contain + all words. +
+ + + + + + + + + +