diff --git a/DESCRIPTION b/DESCRIPTION index 8475b8d5d..afd0d4c4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,24 +1,24 @@ Package: migraph -Title: Many Network Measures, Motifs, Members, and Models -Version: 1.3.4 -Date: 2024-03-07 -Description: A set of tools for analysing multimodal networks. - It includes functions for measuring - centrality, centralization, cohesion, closure, constraint and diversity, - as well as for network block-modelling, regression, and diffusion models. - The package is released as a complement to +Title: Univariate and multivariate tests for multimodal and other networks +Version: 1.4.0 +Date: 2024-07-18 +Description: A set of tools for testing networks. + It includes functions for univariate and multivariate + conditional uniform graph and quadratic assignment procedure testing, + and network regression. + The package is a complement to 'Multimodal Political Networks' (2021, ISBN:9781108985000), and includes various datasets used in the book. Built on the 'manynet' package, all functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, - and on one-mode, two-mode (bipartite), and sometimes three-mode networks. + and on one-mode and two-mode (bipartite) networks. URL: https://stocnet.github.io/migraph/ BugReports: https://github.com/stocnet/migraph/issues License: MIT + file LICENSE Language: en-GB Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: R (>= 3.6.0), manynet @@ -30,10 +30,7 @@ Imports: network, future, furrr, - pillar, purrr, - rlang, - sna, tidygraph, tidyr Suggests: @@ -68,3 +65,6 @@ Authors@R: comment = c(ORCID = "0000-0001-5943-9059")) ) Roxygen: list(markdown = TRUE, roclets = c("namespace", "rd")) +Config/testthat/parallel: true +Config/testthat/edition: 3 +Config/testthat/start-first: helper-functions diff --git a/NAMESPACE b/NAMESPACE index 35ee04aaf..9177352fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,28 +2,10 @@ S3method(glance,netlm) S3method(glance,netlogit) -S3method(plot,graph_test) -S3method(plot,matrix) S3method(plot,netlm) S3method(plot,netlogit) -S3method(plot,network_measures) S3method(plot,network_test) -S3method(plot,node_measure) -S3method(plot,node_member) -S3method(plot,tie_measure) -S3method(print,graph_measure) -S3method(print,graph_motif) -S3method(print,graph_test) -S3method(print,network_measure) -S3method(print,network_motif) S3method(print,network_test) -S3method(print,node_measure) -S3method(print,node_member) -S3method(print,node_motif) -S3method(print,tie_measure) -S3method(summary,node_measure) -S3method(summary,node_member) -S3method(summary,node_motif) S3method(tidy,netlm) S3method(tidy,netlogit) export("%>%") @@ -33,184 +15,28 @@ export(.N) export(aes) export(as.network) export(bind_edges) -export(cluster_concor) -export(cluster_hierarchical) -export(edge_betweenness) -export(edge_bridges) -export(edge_closeness) -export(edge_degree) -export(edge_eigenvector) -export(edge_loop) -export(edge_multiple) -export(edge_reciprocal) export(ggplot) export(ggsave) export(ggtitle) export(glance) -export(graph_adhesion) -export(graph_assortativity) -export(graph_balance) -export(graph_betweenness) -export(graph_blau_index) -export(graph_closeness) -export(graph_cohesion) -export(graph_components) -export(graph_congruency) -export(graph_core) -export(graph_degree) -export(graph_density) -export(graph_diameter) -export(graph_diversity) -export(graph_dyad_census) -export(graph_ei_index) -export(graph_eigenvector) -export(graph_equivalency) -export(graph_factions) -export(graph_homophily) -export(graph_length) -export(graph_mixed_census) -export(graph_modularity) -export(graph_reciprocity) -export(graph_smallworld) -export(graph_transitivity) -export(graph_triad_census) export(guides) export(is.network) export(is.tbl_graph) export(is_igraph) -export(k_elbow) -export(k_silhouette) -export(k_strict) export(labs) export(mutate) -export(network_adhesion) -export(network_assortativity) -export(network_balance) -export(network_betweenness) -export(network_brokerage_census) -export(network_change) -export(network_closeness) -export(network_cohesion) -export(network_components) -export(network_congruency) -export(network_connectedness) -export(network_core) -export(network_degree) -export(network_density) -export(network_diameter) -export(network_diversity) -export(network_dyad_census) -export(network_efficiency) -export(network_eigenvector) -export(network_equivalency) -export(network_factions) -export(network_harmonic) -export(network_hazard) -export(network_heterophily) -export(network_homophily) -export(network_immunity) -export(network_indegree) -export(network_infection_length) -export(network_length) -export(network_mixed_census) -export(network_modularity) -export(network_outdegree) -export(network_reach) -export(network_reciprocity) export(network_reg) -export(network_reproduction) -export(network_richclub) -export(network_richness) -export(network_scalefree) -export(network_smallworld) -export(network_spatial) -export(network_stability) -export(network_transitivity) -export(network_transmissibility) -export(network_triad_census) -export(network_upperbound) -export(node_adopter) -export(node_adoption_time) -export(node_alpha) -export(node_automorphic_equivalence) -export(node_betweenness) -export(node_bridges) -export(node_brokerage_census) -export(node_brokering) -export(node_brokering_activity) -export(node_brokering_exclusivity) -export(node_closeness) -export(node_components) -export(node_constraint) -export(node_core) -export(node_coreness) -export(node_cuts) -export(node_deg) -export(node_degree) -export(node_diversity) -export(node_eccentricity) -export(node_edge_betweenness) -export(node_efficiency) -export(node_effsize) -export(node_eigenvector) -export(node_equivalence) -export(node_exposure) -export(node_fast_greedy) -export(node_flow) -export(node_fluid) -export(node_harmonic) -export(node_heterophily) -export(node_hierarchy) -export(node_homophily) -export(node_indegree) -export(node_induced) -export(node_infection_length) -export(node_infomap) -export(node_information) -export(node_kernighanlin) -export(node_leading_eigen) -export(node_leiden) -export(node_louvain) -export(node_multidegree) -export(node_neighbours_degree) -export(node_optimal) -export(node_outdegree) -export(node_pagerank) -export(node_path_census) -export(node_posneg) -export(node_power) -export(node_quad_census) -export(node_reach) -export(node_reciprocity) -export(node_redundancy) -export(node_regular_equivalence) -export(node_richness) -export(node_roulette) -export(node_spinglass) -export(node_strong_components) -export(node_structural_equivalence) -export(node_thresholds) -export(node_tie_census) -export(node_transitivity) -export(node_triad_census) -export(node_walktrap) -export(node_weak_components) -export(over_time) -export(over_waves) export(rename) +export(scale_y_discrete) +export(test_distribution) +export(test_fit) export(test_gof) export(test_permutation) export(test_random) export(tidy) -export(tie_betweenness) -export(tie_closeness) -export(tie_cohesion) -export(tie_degree) -export(tie_eigenvector) export(with_graph) export(xlab) export(ylab) -import(tidygraph) importFrom(dplyr,"%>%") importFrom(dplyr,bind_cols) importFrom(dplyr,left_join) @@ -220,74 +46,28 @@ importFrom(future,plan) importFrom(generics,glance) importFrom(generics,tidy) importFrom(ggplot2,aes) -importFrom(ggplot2,element_blank) -importFrom(ggplot2,element_text) -importFrom(ggplot2,geom_hline) -importFrom(ggplot2,geom_tile) -importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggsave) importFrom(ggplot2,ggtitle) importFrom(ggplot2,guides) importFrom(ggplot2,labs) -importFrom(ggplot2,scale_fill_gradient) -importFrom(ggplot2,scale_x_discrete) importFrom(ggplot2,scale_y_discrete) -importFrom(ggplot2,theme) -importFrom(ggplot2,theme_grey) importFrom(ggplot2,xlab) importFrom(ggplot2,ylab) -importFrom(igraph,V) -importFrom(igraph,adhesion) -importFrom(igraph,alpha_centrality) -importFrom(igraph,assortativity_degree) -importFrom(igraph,cohesion) -importFrom(igraph,components) -importFrom(igraph,degree) -importFrom(igraph,delete_vertices) -importFrom(igraph,diameter) -importFrom(igraph,distances) -importFrom(igraph,eccentricity) -importFrom(igraph,edge_betweenness) -importFrom(igraph,edge_density) -importFrom(igraph,fit_power_law) -importFrom(igraph,graph_from_incidence_matrix) -importFrom(igraph,is_bipartite) importFrom(igraph,is_igraph) -importFrom(igraph,knn) -importFrom(igraph,make_ego_graph) -importFrom(igraph,mean_distance) -importFrom(igraph,power_centrality) -importFrom(igraph,reciprocity) -importFrom(igraph,transitivity) -importFrom(igraph,triad_census) -importFrom(igraph,vcount) -importFrom(manynet,as_igraph) +importFrom(manynet,bind_node_attributes) +importFrom(manynet,generate_random) +importFrom(manynet,is_complex) +importFrom(manynet,is_directed) importFrom(network,as.network) importFrom(network,is.network) importFrom(purrr,flatten) -importFrom(rlang,"%||%") -importFrom(rlang,.data) -importFrom(rlang,enquo) -importFrom(rlang,eval_tidy) -importFrom(sna,brokerage) -importFrom(sna,flowbet) -importFrom(sna,gcor) -importFrom(sna,infocent) -importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,binomial) -importFrom(stats,coef) -importFrom(stats,complete.cases) -importFrom(stats,cor) -importFrom(stats,cutree) importFrom(stats,df.residual) importFrom(stats,glm.fit) -importFrom(stats,hclust) -importFrom(stats,median) importFrom(stats,pchisq) importFrom(stats,quantile) -importFrom(tidygraph,"%E>%") importFrom(tidygraph,.E) importFrom(tidygraph,.G) importFrom(tidygraph,.N) @@ -296,4 +76,3 @@ importFrom(tidygraph,is.tbl_graph) importFrom(tidygraph,mutate) importFrom(tidygraph,rename) importFrom(tidygraph,with_graph) -importFrom(tidyr,pivot_longer) diff --git a/NEWS.md b/NEWS.md index 785d8a521..99b80fbb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,36 @@ +# migraph 1.4.0 + +2024-07-18 + +## Package + +- Updated migraph logo with stocnet address, colorsafe colorway, and larger nodes and ties +- Copied thisRequires() helper into migraph from manynet +- testthat tests now parallelised +- Fixed precision issues in testthat tests +- Declared global variables `.data` and `.graph_context` + +## Measures, Motifs, and Memberships + +- All measures, motifs, and memberships have migrated to `{manynet}` + - see `{manynet}` > v1.0.0 for more details + +## Models + +- Added `test_distribution()` to test whether two vectors/distributions are from the same distribution +- `test_gof()` renamed to `test_fit()` to improve readability + - Split tests documentation into two + - `test_fit()` no longer measures fit against steps where there is no covariance +- Fixed bug in `test_random()` where parameters were passed to `manynet::generate_random()` instead of the original object, which is processed more intuitively within `manynet::generate_random()` (thanks @RWKrause) +- Corrected that `test_random()` returns results on edge-conditioned uniform graphs, not size +- Reexported `ggplot2::scale_y_discrete()` +- Specification advice in `network_reg()` now ignores absent ego terms for undirected networks + +## Tutorials + +- Added descriptions to tutorials +- Renamed regression tutorial the diversity tutorial + # migraph 1.3.4 2024-03-07 diff --git a/R/class_measures.R b/R/class_measures.R deleted file mode 100644 index 64a245c4f..000000000 --- a/R/class_measures.R +++ /dev/null @@ -1,173 +0,0 @@ -make_node_measure <- function(out, .data) { - if(manynet::is_labelled(.data)) names(out) <- manynet::node_names(.data) - class(out) <- c("node_measure", class(out)) - attr(out, "mode") <- manynet::node_mode(.data) - out -} - -make_tie_measure <- function(out, .data) { - class(out) <- c("tie_measure", class(out)) - out -} - -make_network_measure <- function(out, .data) { - class(out) <- c("network_measure", class(out)) - attr(out, "mode") <- manynet::network_dims(.data) - out -} - -make_network_measures <- function(out, .data) { - time <- value <- NULL - out <- dplyr::as_tibble(out) %>% - dplyr::mutate(time = as.numeric(names(out))) %>% - dplyr::select(time, value) - class(out) <- c("network_measures", class(out)) - attr(out, "mode") <- manynet::network_dims(.data) - out -} - -# Printing #### -#' @export -print.node_measure <- function(x, ..., - n = NULL, digits = 3){ - if (any(attr(x, "mode"))) { - for(m in c(FALSE, TRUE)){ - print_tblvec(y = round(as.numeric(x)[attr(x, "mode") == m], - digits = digits), - names = list(names(x)[attr(x, "mode") == m]), - n = n) - if(!m) cat("\n") - } - } else { - print_tblvec(y = round(as.numeric(x), - digits = digits), - names = list(names(x)), - n = n) - } -} - -#' @export -print.tie_measure <- function(x, ..., - n = NULL, - digits = 3) { - print_tblvec(y = round(as.numeric(x), digits = digits), - names = list(names(x)), n = n) -} - -#' @export -print.network_measure <- function(x, ..., - digits = 3) { - if (length(attr(x, "mode")) == 1) { - print(as.numeric(x), digits = digits) - } else { - y <- as.numeric(x) - if (length(y) == 2) - names(y) <- paste("Mode", seq_len(length(attr(x, "mode")))) - print(y, digits = digits) - } -} - -# @param FUN A function by which the values should be aggregated -# or summarised when a membership vector is given. By default `mean()`. -# summary(node_degree(mpn_elite_mex), -# membership = node_structural_equivalence(mpn_elite_mex, k = "elbow")) -#' @export -summary.node_measure <- function(object, ..., - membership, - FUN = mean) { - if(missing(membership)){ - out <- c(Minimum = min(object, na.rm = TRUE), - Maximum = max(object, na.rm = TRUE), - Mean = mean(object, na.rm = TRUE), - StdDev = stats::sd(object, na.rm = TRUE), - Missing = sum(is.na(object)) - ) - } else { - out <- vapply(unique(membership), - function(x) FUN(object[membership == x]), FUN.VALUE = 1) - names(out) <- unique(membership) - } - out -} - -# Plotting #### -#' @export -plot.node_measure <- function(x, type = c("h", "d"), ...) { - #type <- match.arg(type) - density <- NULL - if (is.null(attr(x, "mode"))) attr(x, "mode") <- rep(FALSE, length(x)) - data <- data.frame(Score = x, Mode = attr(x, "mode")) - if (length(type) == 2) { - p <- ggplot2::ggplot(data = data, ggplot2::aes(x = .data$Score)) + - ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), - binwidth = ifelse(max(data$Score) > 1, 1, - ifelse(max(data$Score) > - .1, .1, .01))) + - ggplot2::geom_density(col = 2) + - ggplot2::scale_y_continuous("Frequency", sec.axis = - ggplot2::sec_axis(~ ., breaks = c(0,1), - name = "Density")) - } else if (length(type) == 1 & type == "h") { - p <- ggplot2::ggplot(data = data, ggplot2::aes(x = .data$Score)) + - ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), - binwidth = ifelse(max(data$Score) > 1, 1, - ifelse(max(data$Score) > - .1, .1, .01))) + - ggplot2::labs(x = "Density", y = "Frequency") - } else if (length(type) == 1 & type == "d") { - p <- ggplot2::ggplot(data = data, ggplot2::aes(x = .data$Score)) + - ggplot2::geom_density(col = 2) + - ggplot2::ylab("Density") - } - p + - ggplot2::theme_classic() + - ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "grey90")) -} - -#' @export -plot.tie_measure <- function(x, type = c("h", "d"), ...) { - type <- match.arg(type) - data <- data.frame(Score = x) - if (type == "h") { - p <- ggplot2::ggplot(data = data) + - ggplot2::geom_histogram(ggplot2::aes(x = .data$Score), - binwidth = ifelse(max(data$Score) > 1, 1, - ifelse(max(data$Score) > .1, - .1, - .01))) + - ggplot2::ylab("Frequency") - } else { - p <- ggplot2::ggplot(data = data) + - ggplot2::geom_density(ggplot2::aes(x = .data$Score)) + - ggplot2::ylab("Density") - } - p + ggplot2::theme_classic() + - ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "grey90")) -} - -#' @export -plot.network_measures <- function(x, ...) { - ggplot2::ggplot(data = x, ggplot2::aes(x = .data$time, y = .data$value)) + - ggplot2::geom_line() + - ggplot2::theme_minimal() + - ggplot2::xlab("Time") + - ggplot2::ylab("Value") -} - - -# make tblvec #### -print_tblvec <- function(y, names, n){ - mat <- matrix(y, dimnames = names) - mat <- t(mat) - out <- as.data.frame(mat) - tibs <- dplyr::tibble(out, .name_repair = "unique") - setup <- pillar::tbl_format_setup(tibs, width = n) - body <- pillar::tbl_format_body(tibs, setup)[c(TRUE, FALSE, TRUE)] - if(setup$extra_cols_total > 0){ - print(body) - cat(pillar::style_subtle(paste("# ... with", - setup$extra_cols_total, - "more values from this nodeset unprinted.", - "Use `print(..., n = Inf)` to print all values."))) - } else print(body) -} diff --git a/R/class_members.R b/R/class_members.R deleted file mode 100644 index 71c2f8246..000000000 --- a/R/class_members.R +++ /dev/null @@ -1,183 +0,0 @@ -make_node_member <- function(out, .data) { - if (manynet::is_labelled(.data)) names(out) <- manynet::node_names(.data) - class(out) <- c("node_member", class(out)) - attr(out, "mode") <- manynet::node_mode(.data) - out -} - -#' @export -print.node_member <- function(x, ..., n = NULL) { - if (any(attr(x, "mode"))) { - for(m in c(FALSE, TRUE)){ - suppressWarnings(print_tblvec(y = as.numeric(x)[attr(x, "mode") == m], - names = list(names(x)[attr(x, "mode") == m]), - n = n)) - if(!m) cat("\n") - } - } else { - suppressWarnings(print_tblvec(y = `if`(all(is.na(as.numeric(x))), x, as.numeric(x)), - names = list(names(x)), - n = n)) - } -} - -#' @export -summary.node_member <- function(object, ..., - n = 6, - digits = 3) { - if (any(attr(object, "mode"))) { - for (i in names(table(object))) { - if (i == names(table(object))[1]) cat(i, "\n") - else cat("\n", i, "\n") - if (!is.null(names(object))) { - y <- paste(names(object[object == i & attr(object, "mode")]), collapse = ", ") - z <- paste(names(object[object == i & !attr(object, "mode")]), collapse = ", ") - } else { - y <- paste(which(object == i & attr(object, "mode")), collapse = ", ") - z <- paste(which(object == i & !attr(object, "mode")), collapse = ", ") - } - cat(" ", y, "\n") - cat(" ", z) - } - } else { - for (i in names(table(object))) { - cat(pillar::style_subtle(paste0("Class ", i, ":"))) - if (!is.null(names(object))) - y <- paste(names(object[object == i]), collapse = ", ") - else - y <- paste(which(object == i), collapse = ", ") - cat(" ", y) - if (i != names(table(object))[length(table(object))]) cat("\n") - } - } -} - -#' @importFrom stats cutree -#' @export -plot.node_member <- function(x, ...) { - if (!("ggdendro" %in% rownames(utils::installed.packages()))) { - message("Please install package `{ggdendro}`.") - } else { - hc <- attr(x, "hc") - k <- attr(x, "k") - memb <- x[hc$order] - clust <- memb[!duplicated(memb)] - colors <- ifelse(match(memb, clust) %% 2, - "#000000", "#E20020") - ggdendro::ggdendrogram(hc, rotate = TRUE) + - ggplot2::geom_hline(yintercept = hc$height[length(hc$order) - k], - linetype = 2, - color = "#E20020") + - ggplot2::theme(axis.text.x = ggplot2::element_text(colour = "#5c666f"), - axis.text.y = suppressWarnings( - ggplot2::element_text(colour = colors))) - } -} - -# plot(as_matrix(ison_adolescents), -# membership = node_regular_equivalence(ison_adolescents, "e")) -# plot(as_matrix(ison_southern_women), -# membership = node_regular_equivalence(ison_southern_women, "e")) -#' @importFrom tidyr pivot_longer -#' @importFrom ggplot2 ggplot geom_tile aes scale_fill_gradient theme_grey labs theme scale_x_discrete scale_y_discrete geom_vline geom_hline element_blank element_text -#' @importFrom rlang .data -#' @export -plot.matrix <- function(x, ..., membership = NULL) { - - if (!manynet::is_twomode(x)) { - blocked_data <- manynet::as_matrix(x) - if (!is.null(membership)) blocked_data <- blocked_data[order(membership), - order(membership)] - } else if (manynet::is_twomode(x) && - length(intersect(membership[!manynet::node_mode(x)], - membership[!manynet::node_mode(x)])) > 0) { - blocked_data <- manynet::as_matrix(manynet::to_multilevel(x)) - if (!is.null(membership)) blocked_data <- blocked_data[order(membership), - order(membership)] - } else { - blocked_data <- manynet::as_matrix(x) - } - - plot_data <- as.data.frame(blocked_data) %>% - dplyr::mutate(Var1 = rownames(blocked_data)) %>% - tidyr::pivot_longer(!.data[["Var1"]], names_to = "Var2", values_to = "value") - g <- ggplot2::ggplot(plot_data, ggplot2::aes(.data[["Var2"]], .data[["Var1"]])) + - ggplot2::theme_grey(base_size = 9) + - ggplot2::labs(x = "", y = "") + - ggplot2::theme( - legend.position = "none", - axis.ticks = ggplot2::element_blank(), - axis.text.y = ggplot2::element_text( - size = 9 * 0.8, - colour = "grey50" - ), - axis.text.x = ggplot2::element_text( - size = 9 * 0.8, - angle = 30, hjust = 0, - colour = "grey50" - ) - ) + - ggplot2::geom_tile(ggplot2::aes(fill = .data[["value"]]), - colour = "white" - ) - - # Color for signed networks - if (manynet::is_signed(x)) { - g <- g + - ggplot2::scale_fill_gradient2(high = "#003049", - mid = "white", - low = "#d62828") - } else { - g <- g + - ggplot2::scale_fill_gradient( - low = "white", - high = "black" - ) - } - - # Structure for multimodal networks - if (!manynet::is_twomode(x)) { - g <- g + - ggplot2::scale_x_discrete(expand = c(0, 0), position = "top", - limits = colnames(blocked_data) - ) + - ggplot2::scale_y_discrete(expand = c(0, 0), - limits = rev(rownames(blocked_data)) - ) - if (!is.null(membership)) - g <- g + ggplot2::geom_vline( - xintercept = c(1 + which(diff(membership[order(membership)]) != 0)) - - .5, - colour = "red" - ) + - ggplot2::geom_hline( - yintercept = nrow(blocked_data) - - c(1 + which(diff(membership[order(membership)]) != 0)) + - 1.5, - colour = "red" - ) - } else { - g <- g + - ggplot2::scale_y_discrete(expand = c(0, 0), - limits = rev(rownames(x[["blocked.data"]])[x[["order.vector"]][["nodes1"]]]) - ) + - ggplot2::scale_x_discrete(expand = c(0, 0), position = "top", - limits = colnames(x[["blocked.data"]])[x[["order.vector"]][["nodes2"]]] - ) + - ggplot2::geom_vline( - xintercept = - c(1 + which(diff(x[["block.membership"]][["nodes2"]]) != 0)) - - .5, - colour = "blue" - ) + - ggplot2::geom_hline( - yintercept = nrow(x[["blocked.data"]]) - - c(1 + which(diff(x[["block.membership"]][["nodes1"]]) != 0)) - + 1.5, - colour = "red" - ) - } - g -} - -elementwise.all.equal <- Vectorize(function(x, y) {isTRUE(all.equal(x, y))}) diff --git a/R/class_motifs.R b/R/class_motifs.R deleted file mode 100644 index 52dec104f..000000000 --- a/R/class_motifs.R +++ /dev/null @@ -1,50 +0,0 @@ -make_node_motif <- function(out, .data) { - class(out) <- c("node_motif", class(out)) - attr(out, "mode") <- manynet::node_mode(.data) - out -} - -make_network_motif <- function(out, .data) { - class(out) <- c("network_motif", class(out)) - attr(out, "mode") <- manynet::network_dims(.data) - out -} - -#' @export -print.node_motif <- function(x, ..., - max.length = 6, - digits = 3) { - if(!is.null(attr(x, "dimnames")[[1]])){ - x <- data.frame(names = attr(x, "dimnames")[[1]], x) - } - if (any(attr(x, "mode"))) { - print(dplyr::tibble(as.data.frame(x)[!attr(x, "mode")])) - print(dplyr::tibble(as.data.frame(x)[attr(x, "mode")])) - } else { - print(dplyr::tibble(as.data.frame(x))) - } -} - -# summary(node_triad_census(mpn_elite_mex), -# membership = node_regular_equivalence(mpn_elite_mex, "elbow")) -#' @export -summary.node_motif <- function(object, ..., - membership, - FUN = mean) { - out <- t(sapply(unique(membership), function(x) { - if (sum(membership==x)==1) object[membership==x,] - else apply(object[membership == x, ], 2, FUN) - })) - rownames(out) <- paste("Block", unique(membership)) - dplyr::tibble(as.data.frame(out)) -} - -#' @export -print.network_motif <- function(x, ...) { - names <- list(names(x)) - x <- as.numeric(x) - mat <- matrix(x, dimnames = names) - mat <- t(mat) - out <- as.data.frame(mat) - print(dplyr::tibble(out)) -} diff --git a/R/measure_centrality.R b/R/measure_centrality.R deleted file mode 100644 index d6cfcdf13..000000000 --- a/R/measure_centrality.R +++ /dev/null @@ -1,840 +0,0 @@ -# Degree-like centralities #### - -#' Measures of degree-like centrality and centralisation -#' -#' @description -#' These functions calculate common degree-related centrality measures for one- and two-mode networks: -#' -#' - `node_degree()` measures the degree centrality of nodes in an unweighted network, -#' or weighted degree/strength of nodes in a weighted network; -#' there are several related shortcut functions: -#' - `node_deg()` returns the unnormalised results. -#' - `node_indegree()` returns the `direction = 'in'` results. -#' - `node_outdegree()` returns the `direction = 'out'` results. -#' - `node_multidegree()` measures the ratio between types of ties in a multiplex network. -#' - `node_posneg()` measures the PN (positive-negative) centrality of a signed network. -#' - `tie_degree()` measures the degree centrality of ties in a network -#' - `network_degree()` measures a network's degree centralization; -#' there are several related shortcut functions: -#' - `network_indegree()` returns the `direction = 'out'` results. -#' - `network_outdegree()` returns the `direction = 'out'` results. -#' -#' All measures attempt to use as much information as they are offered, -#' including whether the networks are directed, weighted, or multimodal. -#' If this would produce unintended results, -#' first transform the salient properties using e.g. [to_undirected()] functions. -#' All centrality and centralization measures return normalized measures by default, -#' including for two-mode networks. -#' @name degree_centrality -#' @family centrality -#' @family measures -#' @seealso [to_undirected()] for removing edge directions -#' and [to_unweighted()] for removing weights from a graph. -#' @inheritParams cohesion -#' @param normalized Logical scalar, whether the centrality scores are normalized. -#' Different denominators are used depending on whether the object is one-mode or two-mode, -#' the type of centrality, and other arguments. -#' @param alpha Numeric scalar, the positive tuning parameter introduced in -#' Opsahl et al (2010) for trading off between degree and strength centrality measures. -#' By default, `alpha = 0`, which ignores tie weights and the measure is solely based -#' upon degree (the number of ties). -#' `alpha = 1` ignores the number of ties and provides the sum of the tie weights -#' as strength centrality. -#' Values between 0 and 1 reflect different trade-offs in the relative contributions of -#' degree and strength to the final outcome, with 0.5 as the middle ground. -#' Values above 1 penalise for the number of ties. -#' Of two nodes with the same sum of tie weights, the node with fewer ties will obtain -#' the higher score. -#' This argument is ignored except in the case of a weighted network. -#' @param direction Character string, “out” bases the measure on outgoing ties, -#' “in” on incoming ties, and "all" on either/the sum of the two. -#' For two-mode networks, "all" uses as numerator the sum of differences -#' between the maximum centrality score for the mode -#' against all other centrality scores in the network, -#' whereas "in" uses as numerator the sum of differences -#' between the maximum centrality score for the mode -#' against only the centrality scores of the other nodes in that mode. -#' @return A single centralization score if the object was one-mode, -#' and two centralization scores if the object was two-mode. -#' @importFrom rlang enquo eval_tidy -#' @importFrom igraph graph_from_incidence_matrix is_bipartite degree V -#' @references -#' Faust, Katherine. 1997. -#' "Centrality in affiliation networks." -#' _Social Networks_ 19(2): 157-191. -#' \doi{10.1016/S0378-8733(96)00300-0}. -#' -#' Borgatti, Stephen P., and Martin G. Everett. 1997. -#' "Network analysis of 2-mode data." -#' _Social Networks_ 19(3): 243-270. -#' \doi{10.1016/S0378-8733(96)00301-2}. -#' -#' Borgatti, Stephen P., and Daniel S. Halgin. 2011. -#' "Analyzing affiliation networks." -#' In _The SAGE Handbook of Social Network Analysis_, -#' edited by John Scott and Peter J. Carrington, 417–33. -#' London, UK: Sage. -#' \doi{10.4135/9781446294413.n28}. -#' -#' Opsahl, Tore, Filip Agneessens, and John Skvoretz. 2010. -#' "Node centrality in weighted networks: Generalizing degree and shortest paths." -#' _Social Networks_ 32, 245-251. -#' \doi{10.1016/j.socnet.2010.03.006} -#' @examples -#' node_degree(mpn_elite_mex) -#' node_degree(ison_southern_women) -#' @return Depending on how and what kind of an object is passed to the function, -#' the function will return a `tidygraph` object where the nodes have been updated -NULL - -#' @rdname degree_centrality -#' @importFrom manynet as_igraph -#' @export -node_degree <- function (.data, normalized = TRUE, alpha = 1, - direction = c("all","out","in")){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - graph <- manynet::as_igraph(.data) - weights <- `if`(manynet::is_weighted(.data), - manynet::tie_weights(.data), NA) - direction <- match.arg(direction) - - # Do the calculations - if (manynet::is_twomode(graph) & normalized){ - degrees <- igraph::degree(graph = graph, - v = igraph::V(graph), - mode = direction, - loops = manynet::is_complex(.data)) - other_set_size <- ifelse(igraph::V(graph)$type, - sum(!igraph::V(graph)$type), - sum(igraph::V(graph)$type)) - out <- degrees/other_set_size - } else { - if (all(is.na(weights))) { - out <- igraph::degree(graph = graph, v = igraph::V(graph), - mode = direction, - loops = manynet::is_complex(.data), - normalized = normalized) - } - else { - ki <- igraph::degree(graph = graph, v = igraph::V(graph), - mode = direction, - loops = manynet::is_complex(.data)) - si <- igraph::strength(graph = graph, vids = igraph::V(graph), - mode = direction, - loops = manynet::is_complex(.data), weights = weights) - out <- ki * (si/ki)^alpha - if(normalized) out <- out/max(out) - } - } - out <- make_node_measure(out, .data) - out -} - -#' @rdname degree_centrality -#' @export -node_deg <- function (.data, alpha = 0, direction = c("all","out","in")){ - node_degree(.data, normalized = FALSE, alpha = alpha, direction = direction) -} - -#' @rdname degree_centrality -#' @export -node_outdegree <- function (.data, normalized = TRUE, alpha = 0){ - node_degree(.data, normalized = normalized, alpha = alpha, direction = "out") -} - -#' @rdname degree_centrality -#' @export -node_indegree <- function (.data, normalized = TRUE, alpha = 0){ - node_degree(.data, normalized = normalized, alpha = alpha, direction = "in") -} - -#' @rdname degree_centrality -#' @param tie1 Character string indicating the first uniplex network. -#' @param tie2 Character string indicating the second uniplex network. -#' @export -node_multidegree <- function (.data, tie1, tie2){ - stopifnot(manynet::is_multiplex(.data)) - out <- node_degree(manynet::to_uniplex(.data, tie1)) - - node_degree(manynet::to_uniplex(.data, tie2)) - make_node_measure(out, .data) -} - -#' @rdname degree_centrality -#' @references -#' Everett, Martin G., and Stephen P. Borgatti. 2014. -#' “Networks Containing Negative Ties.” -#' _Social Networks_ 38:111–20. -#' \doi{10.1016/j.socnet.2014.03.005}. -#' @export -node_posneg <- function(.data){ - stopifnot(manynet::is_signed(.data)) - pos <- manynet::as_matrix(manynet::to_unsigned(.data, keep = "positive")) - neg <- manynet::as_matrix(manynet::to_unsigned(.data, keep = "negative")) - nn <- manynet::network_nodes(.data) - pn <- pos-neg*2 - diag(pn) <- 0 - idmat <- diag(nn) - v1 <- matrix(1,nn,1) - out <- solve(idmat - ((pn%*%t(pn))/(4*(nn-1)^2))) %*% (idmat+( pn/(2*(nn-1)) )) %*% v1 - make_node_measure(out, .data) -} - -#' @rdname degree_centrality -#' @examples -#' tie_degree(ison_adolescents) -#' @export -tie_degree <- function(.data, normalized = TRUE){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - edge_adj <- manynet::to_ties(.data) - out <- node_degree(edge_adj, normalized = normalized) - class(out) <- "numeric" - out <- make_tie_measure(out, .data) - out -} - -#' @rdname degree_centrality -#' @examples -#' network_degree(ison_southern_women, direction = "in") -#' @export -network_degree <- function(.data, normalized = TRUE, - direction = c("all", "out", "in")){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - direction <- match.arg(direction) - - if (manynet::is_twomode(.data)) { - mat <- manynet::as_matrix(.data) - mode <- c(rep(FALSE, nrow(mat)), rep(TRUE, ncol(mat))) - - out <- list() - if (direction == "all") { - if (!normalized) { - allcent <- c(rowSums(mat), colSums(mat)) - out$nodes1 <- sum(max(allcent[!mode]) - allcent)/((nrow(mat) + ncol(mat))*ncol(mat) - 2*(ncol(mat) + nrow(mat) - 1)) - out$nodes2 <- sum(max(allcent[mode]) - allcent)/((nrow(mat) + ncol(mat))*nrow(mat) - 2*(ncol(mat) + nrow(mat) - 1)) - } else if (normalized) { - allcent <- node_degree(mat, normalized = TRUE) - out$nodes1 <- sum(max(allcent[!mode]) - allcent)/((nrow(mat) + ncol(mat) - 1) - (ncol(mat) - 1) / nrow(mat) - (ncol(mat) + nrow(mat) - 1)/nrow(mat)) - out$nodes2 <- sum(max(allcent[mode]) - allcent)/((ncol(mat) + nrow(mat) - 1) - (nrow(mat) - 1) / ncol(mat) - (nrow(mat) + ncol(mat) - 1)/ncol(mat)) - } - } else if (direction == "in") { - out$nodes1 <- sum(max(rowSums(mat)) - rowSums(mat))/((ncol(mat) - 1)*(nrow(mat) - 1)) - out$nodes2 <- sum(max(colSums(mat)) - colSums(mat))/((ncol(mat) - 1)*(nrow(mat) - 1)) - } - out <- c("Mode 1" = out$nodes1, "Mode 2" = out$nodes2) - } else { - out <- igraph::centr_degree(graph = .data, mode = direction, - normalized = normalized)$centralization - } - out <- make_network_measure(out, .data) - out -} - -#' @rdname degree_centrality -#' @export -network_outdegree <- function(.data, normalized = TRUE){ - network_degree(.data, normalized = normalized, direction = "out") -} - -#' @rdname degree_centrality -#' @export -network_indegree <- function(.data, normalized = TRUE){ - network_degree(.data, normalized = normalized, direction = "in") -} - -# Betweenness-like centralities #### - -#' Measures of betweenness-like centrality and centralisation -#' @description -#' These functions calculate common betweenness-related centrality measures for one- and two-mode networks: -#' -#' - `node_betweenness()` measures the betweenness centralities of nodes in a network. -#' - `node_induced()` measures the induced betweenness centralities of nodes in a network. -#' - `node_flow()` measures the flow betweenness centralities of nodes in a network, -#' which uses an electrical current model for information spreading -#' in contrast to the shortest paths model used by normal betweenness centrality. -#' - `tie_betweenness()` measures the number of shortest paths going through a tie. -#' - `network_betweenness()` measures the betweenness centralization for a network. -#' -#' All measures attempt to use as much information as they are offered, -#' including whether the networks are directed, weighted, or multimodal. -#' If this would produce unintended results, -#' first transform the salient properties using e.g. [to_undirected()] functions. -#' All centrality and centralization measures return normalized measures by default, -#' including for two-mode networks. -#' @name between_centrality -#' @family centrality -#' @family measures -#' @inheritParams degree_centrality -#' @param cutoff The maximum path length to consider when calculating betweenness. -#' If negative or NULL (the default), there's no limit to the path lengths considered. -NULL - -#' @rdname between_centrality -#' @import tidygraph -#' @examples -#' node_betweenness(mpn_elite_mex) -#' node_betweenness(ison_southern_women) -#' @return A numeric vector giving the betweenness centrality measure of each node. -#' @export -node_betweenness <- function(.data, normalized = TRUE, - cutoff = NULL){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - weights <- `if`(manynet::is_weighted(.data), - manynet::tie_weights(.data), NA) - graph <- manynet::as_igraph(.data) - - # Do the calculations - if (manynet::is_twomode(graph) & normalized){ - betw_scores <- igraph::betweenness(graph = graph, v = igraph::V(graph), - directed = manynet::is_directed(graph)) - other_set_size <- ifelse(igraph::V(graph)$type, sum(!igraph::V(graph)$type), sum(igraph::V(graph)$type)) - set_size <- ifelse(igraph::V(graph)$type, sum(igraph::V(graph)$type), sum(!igraph::V(graph)$type)) - out <- ifelse(set_size > other_set_size, - betw_scores/(2*(set_size-1)*(other_set_size-1)), - betw_scores/(1/2*other_set_size*(other_set_size-1)+1/2*(set_size-1)*(set_size-2)+(set_size-1)*(other_set_size-1))) - } else { - if (is.null(cutoff)) { - out <- igraph::betweenness(graph = graph, v = igraph::V(graph), - directed = manynet::is_directed(graph), weights = weights, - normalized = normalized) - } else { - out <- igraph::betweenness(graph = graph, v = igraph::V(graph), - directed = manynet::is_directed(graph), - cutoff = cutoff, - weights = weights) - } - } - out <- make_node_measure(out, .data) - out -} - -#' @rdname between_centrality -#' @examples -#' node_induced(mpn_elite_mex) -#' @references -#' Everett, Martin and Steve Borgatti. 2010. -#' "Induced, endogenous and exogenous centrality" -#' _Social Networks_, 32: 339-344. -#' \doi{10.1016/j.socnet.2010.06.004} -#' @export -node_induced <- function(.data, normalized = TRUE, - cutoff = NULL){ - endog <- sum(node_betweenness(.data, normalized = normalized, cutoff = cutoff), - na.rm = TRUE) - exog <- vapply(seq.int(manynet::network_nodes(.data)), - function(x) sum(node_betweenness(manynet::delete_nodes(.data, x), - normalized = normalized, cutoff = cutoff), - na.rm = TRUE), - FUN.VALUE = numeric(1)) - out <- endog - exog - make_node_measure(out, .data) -} - - -#' @rdname between_centrality -#' @importFrom sna flowbet -#' @export -node_flow <- function(.data, normalized = TRUE){ - out <- sna::flowbet(manynet::as_network(.data), - gmode = ifelse(manynet::is_directed(.data), "digraph", "graph"), - diag = manynet::is_complex(.data), - cmode = ifelse(normalized, "normflow", "rawflow")) - make_node_measure(out, .data) -} - -#' @rdname between_centrality -#' @importFrom igraph edge_betweenness -#' @examples -#' (tb <- tie_betweenness(ison_adolescents)) -#' plot(tb) -#' #ison_adolescents %>% mutate_ties(weight = tb) %>% -#' # autographr() -#' @export -tie_betweenness <- function(.data, normalized = TRUE){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - .data <- manynet::as_igraph(.data) - eddies <- manynet::as_edgelist(.data) - eddies <- paste(eddies[["from"]], eddies[["to"]], sep = "-") - out <- igraph::edge_betweenness(.data) - names(out) <- eddies - out <- make_tie_measure(out, .data) - out -} - -#' @rdname between_centrality -#' @examples -#' network_betweenness(ison_southern_women, direction = "in") -#' @export -network_betweenness <- function(.data, normalized = TRUE, - direction = c("all", "out", "in")) { - - if(missing(.data)) {expect_nodes(); .data <- .G()} - direction <- match.arg(direction) - graph <- manynet::as_igraph(.data) - - if (manynet::is_twomode(.data)) { - becent <- node_betweenness(graph, normalized = FALSE) - mode <- igraph::V(graph)$type - mode1 <- length(mode) - sum(mode) - mode2 <- sum(mode) - out <- list() - if (direction == "all") { - if (!normalized) { - out$nodes1 <- sum(max(becent[!mode]) - becent) / ((1/2 * mode2 * (mode2 - 1) + 1/2 * (mode1 - 1)*(mode1 - 2) + (mode1 - 1) * (mode2 - 2))*(mode1 + mode2 - 1) + (mode1 - 1)) - out$nodes2 <- sum(max(becent[mode]) - becent) / ((1/2 * mode1 * (mode1 - 1) + 1/2 * (mode2 - 1)*(mode2 - 2) + (mode2 - 1) * (mode1 - 2))*(mode2 + mode1 - 1) + (mode2 - 1)) - if (mode1 > mode2) { - out$nodes1 <- sum(max(becent[!mode]) - becent) / (2 * (mode1 - 1) * (mode2 - 1) * (mode1 + mode2 - 1) - (mode2 - 1) * (mode1 + mode2 - 2) - 1/2 * (mode1 - mode2) * (mode1 + 3*mode2 - 3)) - } - if (mode2 > mode1) { - out$nodes2 <- sum(max(becent[mode]) - becent) / (2 * (mode2 - 1) * (mode1 - 1) * (mode2 + mode1 - 1) - (mode1 - 1) * (mode2 + mode1 - 2) - 1/2 * (mode2 - mode1) * (mode2 + 3*mode1 - 3)) - } - } else if (normalized) { - out$nodes1 <- sum(max(becent[!mode]) - becent) / ((1/2 * mode2 * (mode2 - 1) + 1/2 * (mode1 - 1)*(mode1 - 2) + (mode1 - 1) * (mode2 - 2))*(mode1 + mode2 - 1) + (mode1 - 1)) - out$nodes2 <- sum(max(becent[mode]) - becent) / ((1/2 * mode1 * (mode1 - 1) + 1/2 * (mode2 - 1)*(mode2 - 2) + (mode2 - 1) * (mode1 - 2))*(mode2 + mode1 - 1) + (mode2 - 1)) - if (mode1 > mode2) { - becent <- node_betweenness(graph, normalized = TRUE) - out$nodes1 <- sum(max(becent[!mode]) - becent) / ((mode1 + mode2 - 1) - (((mode2 - 1)*(mode1 + mode2 - 2) + 1/2*(mode1 - mode2)*(mode1 + (3*mode2) - 3)) / (1/2*(mode1*(mode1 - 1)) + 1/2*(mode2 - 1) * (mode2 - 2) + (mode1 - 1) * (mode2 - 1)))) - } - if (mode2 > mode1) { - becent <- node_betweenness(graph, normalized = TRUE) - out$nodes2 <- sum(max(becent[mode]) - becent) / ((mode1 + mode2 - 1)*((mode1 - 1)*(mode1 + mode2 - 2) / 2*(mode1 - 1)*(mode2 - 1))) - } - } - } else if (direction == "in") { - out$nodes1 <- sum(max(becent[!mode]) - becent[!mode])/((mode1 - 1)*(1/2*mode2*(mode2 - 1) + 1/2*(mode1 - 1)*(mode1 - 2) + (mode1 - 1)*(mode2 - 1))) - out$nodes2 <- sum(max(becent[mode]) - becent[mode])/((mode2 - 1)*(1/2*mode1*(mode1 - 1) + 1/2 * (mode2 - 1) * (mode2 - 2) + (mode2 - 1) * (mode1 - 1))) - if (mode1 > mode2) { - out$nodes1 <- sum(max(becent[!mode]) - becent[!mode]) / (2 * (mode1 - 1)^2 * (mode2 - 1)) - } - if (mode2 > mode1) { - out$nodes2 <- sum(max(becent[mode]) - becent[mode]) / (2 * (mode2 - 1)^2 * (mode1 - 1)) - } - } - out <- c("Mode 1" = out$nodes1, "Mode 2" = out$nodes2) - } else { - out <- igraph::centr_betw(graph = graph)$centralization - } - out <- make_network_measure(out, .data) - out -} - -# Closeness-like centralities #### - -#' Measures of closeness-like centrality and centralisation -#' @description -#' These functions calculate common closeness-related centrality measures for one- and two-mode networks: -#' -#' - `node_closeness()` measures the closeness centrality of nodes in a network. -#' - `node_reach()` measures nodes' reach centrality, -#' or how many nodes they can reach within _k_ steps. -#' - `node_harmonic()` measures nodes' harmonic centrality or valued centrality, -#' which is thought to behave better than reach centrality for disconnected networks. -#' - `node_information()` measures nodes' information centrality or -#' current-flow closeness centrality. -#' - `tie_closeness()` measures the closeness of each tie to other ties in the network. -#' - `network_closeness()` measures a network's closeness centralization. -#' - `network_reach()` measures a network's reach centralization. -#' - `network_harmonic()` measures a network's harmonic centralization. -#' -#' All measures attempt to use as much information as they are offered, -#' including whether the networks are directed, weighted, or multimodal. -#' If this would produce unintended results, -#' first transform the salient properties using e.g. [to_undirected()] functions. -#' All centrality and centralization measures return normalized measures by default, -#' including for two-mode networks. -#' @name close_centrality -#' @family centrality -#' @family measures -#' @inheritParams degree_centrality -NULL - -#' @rdname close_centrality -#' @param cutoff Maximum path length to use during calculations. -#' @import tidygraph -#' @importFrom rlang %||% -#' @examples -#' node_closeness(mpn_elite_mex) -#' node_closeness(ison_southern_women) -#' @export -node_closeness <- function(.data, normalized = TRUE, - direction = "out", cutoff = NULL){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - weights <- `if`(manynet::is_weighted(.data), - manynet::tie_weights(.data), NA) - graph <- manynet::as_igraph(.data) - - # Do the calculations - if (manynet::is_twomode(graph) & normalized){ - # farness <- rowSums(igraph::distances(graph = graph)) - closeness <- igraph::closeness(graph = graph, vids = igraph::V(graph), mode = direction) - other_set_size <- ifelse(igraph::V(graph)$type, sum(!igraph::V(graph)$type), sum(igraph::V(graph)$type)) - set_size <- ifelse(igraph::V(graph)$type, sum(igraph::V(graph)$type), sum(!igraph::V(graph)$type)) - out <- closeness/(1/(other_set_size+2*set_size-2)) - } else { - cutoff <- cutoff %||% -1 - out <- igraph::closeness(graph = graph, vids = igraph::V(graph), mode = direction, - cutoff = cutoff, weights = weights, normalized = normalized) - } - out <- make_node_measure(out, .data) - out -} - -#' @rdname close_centrality -#' @param k Integer of steps out to calculate reach -#' @examples -#' node_reach(ison_adolescents) -#' @export -node_reach <- function(.data, normalized = TRUE, k = 2){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - out <- rowSums(node_path_census(.data)<=k) - if(normalized) out <- out/(manynet::network_nodes(.data)-1) - out <- make_node_measure(out, .data) - out -} - -#' @rdname close_centrality -#' @references -#' Marchiori, M, and V Latora. 2000. -#' "Harmony in the small-world". -#' _Physica A_ 285: 539-546. -#' -#' Dekker, Anthony. 2005. -#' "Conceptual distance in social network analysis". -#' _Journal of Social Structure_ 6(3). -#' @export -node_harmonic <- function(.data, normalized = TRUE, k = -1){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - out <- igraph::harmonic_centrality(as_igraph(.data), # weighted if present - normalized = normalized, cutoff = k) - out <- make_node_measure(out, .data) - out -} - -#' @rdname close_centrality -#' @importFrom sna infocent -#' @export -node_information <- function(.data, normalized = TRUE){ - out <- sna::infocent(manynet::as_network(.data), - gmode = ifelse(manynet::is_directed(.data), "digraph", "graph"), - diag = manynet::is_complex(.data)) - make_node_measure(out, .data) -} - -#' @rdname close_centrality -#' @examples -#' (ec <- tie_closeness(ison_adolescents)) -#' plot(ec) -#' #ison_adolescents %>% -#' # activate(edges) %>% mutate(weight = ec) %>% -#' # autographr() -#' @export -tie_closeness <- function(.data, normalized = TRUE){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - edge_adj <- manynet::to_ties(.data) - out <- node_closeness(edge_adj, normalized = normalized) - class(out) <- "numeric" - out <- make_tie_measure(out, .data) - out -} - -#' @rdname close_centrality -#' @examples -#' network_closeness(ison_southern_women, direction = "in") -#' @export -network_closeness <- function(.data, normalized = TRUE, - direction = c("all", "out", "in")){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - direction <- match.arg(direction) - graph <- manynet::as_igraph(.data) - - if (manynet::is_twomode(.data)) { - clcent <- node_closeness(graph, normalized = TRUE) - mode <- igraph::V(graph)$type - mode1 <- length(mode) - sum(mode) - mode2 <- sum(mode) - out <- list() - if (direction == "in") { - out$nodes1 <- sum(max(clcent[!mode]) - clcent[!mode])/(((mode1 - 2)*(mode1 - 1))/(2 * mode1 - 3)) - out$nodes2 <- sum(max(clcent[mode]) - clcent[mode])/(((mode2 - 2)*(mode2 - 1))/(2 * mode2 - 3)) - if (mode1 > mode2) { #28.43 - lhs <- ((mode2 - 1)*(mode1 - 2) / (2 * mode1 - 3)) - rhs <- ((mode2 - 1)*(mode1 - mode2) / (mode1 + mode2 - 2)) - out$nodes1 <- sum(max(clcent[!mode]) - clcent[!mode])/( lhs + rhs) # 0.2135 - } - if (mode2 > mode1) { - lhs <- ((mode1 - 1)*(mode2 - 2) / (2 * mode2 - 3)) - rhs <- ((mode1 - 1)*(mode2 - mode1) / (mode2 + mode1 - 2)) - out$nodes2 <- sum(max(clcent[mode]) - clcent[mode])/( lhs + rhs) - } - } else { - term1 <- 2*(mode1 - 1) * (mode2 + mode1 - 4)/(3*mode2 + 4*mode1 - 8) - term2 <- 2*(mode1 - 1) * (mode1 - 2)/(2*mode2 + 3*mode1 - 6) - term3 <- 2*(mode1 - 1) * (mode2 - mode1 + 1)/(2*mode2 + 3*mode1 - 4) - out$nodes1 <- sum(max(clcent[!mode]) - clcent) / sum(term1, term2, term3) - term1 <- 2*(mode2 - 1) * (mode1 + mode2 - 4)/(3*mode1 + 4*mode2 - 8) - term2 <- 2*(mode2 - 1) * (mode2 - 2)/(2*mode1 + 3*mode2 - 6) - term3 <- 2*(mode2 - 1) * (mode1 - mode2 + 1)/(2*mode1 + 3*mode2 - 4) - out$nodes2 <- sum(max(clcent[mode]) - clcent) / sum(term1, term2, term3) - - if (mode1 > mode2) { - term1 <- 2*(mode2 - 1) * (mode2 + mode1 - 2) / (3 * mode2 + 4 * mode1 - 8) - term2 <- 2*(mode1 - mode2) * (2 * mode2 - 1) / (5 * mode2 + 2 * mode1 - 6) - term3 <- 2*(mode2 - 1) * (mode1 - 2) / (2 * mode2 + 3 * mode1 - 6) - term4 <- 2 * (mode2 - 1) / (mode1 + 4 * mode2 - 4) - out$nodes1 <- sum(max(clcent[!mode]) - clcent) / sum(term1, term2, term3, term4) - } - if (mode2 > mode1) { - term1 <- 2*(mode1 - 1) * (mode1 + mode2 - 2) / (3 * mode1 + 4 * mode2 - 8) - term2 <- 2*(mode2 - mode1) * (2 * mode1 - 1) / (5 * mode1 + 2 * mode2 - 6) - term3 <- 2*(mode1 - 1) * (mode2 - 2) / (2 * mode1 + 3 * mode2 - 6) - term4 <- 2 * (mode1 - 1) / (mode2 + 4 * mode1 - 4) - out$nodes2 <- sum(max(clcent[mode]) - clcent) / sum(term1, term2, term3, term4) - } - } - out <- c("Mode 1" = out$nodes1, "Mode 2" = out$nodes2) - } else { - out <- igraph::centr_clo(graph = graph, - mode = direction, - normalized = normalized)$centralization - } - out <- make_network_measure(out, .data) - out -} - -#' @rdname close_centrality -#' @export -network_reach <- function(.data, normalized = TRUE, k = 2){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - reaches <- node_reach(.data, normalized = FALSE, k = k) - out <- sum(max(reaches) - reaches) - if(normalized) out <- out / sum(manynet::network_nodes(.data) - reaches) - make_network_measure(out, .data) -} - -#' @rdname close_centrality -#' @export -network_harmonic <- function(.data, normalized = TRUE, k = 2){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - harm <- node_harmonic(.data, normalized = FALSE, k = k) - out <- sum(max(harm) - harm) - if(normalized) out <- out / sum(manynet::network_nodes(.data) - harm) - make_network_measure(out, .data) -} - -# Eigenvector-like centralities #### - -#' Measures of eigenvector-like centrality and centralisation -#' @description -#' These functions calculate common eigenvector-related centrality measures for one- and two-mode networks: -#' -#' - `node_eigenvector()` measures the eigenvector centrality of nodes in a network. -#' - `node_power()` measures the Bonacich, beta, or power centrality of nodes in a network. -#' - `node_alpha()` measures the alpha or Katz centrality of nodes in a network. -#' - `node_pagerank()` measures the pagerank centrality of nodes in a network. -#' - `tie_eigenvector()` measures the eigenvector centrality of ties in a network. -#' - `network_eigenvector()` measures the eigenvector centralization for a network. -#' -#' All measures attempt to use as much information as they are offered, -#' including whether the networks are directed, weighted, or multimodal. -#' If this would produce unintended results, -#' first transform the salient properties using e.g. [to_undirected()] functions. -#' All centrality and centralization measures return normalized measures by default, -#' including for two-mode networks. -#' @name eigenv_centrality -#' @family centrality -#' @family measures -#' @inheritParams degree_centrality -NULL - -#' @rdname eigenv_centrality -#' @section Eigenvector centrality: -#' Eigenvector centrality operates as a measure of a node's influence in a network. -#' The idea is that being connected to well-connected others results in a higher score. -#' Each node's eigenvector centrality can be defined as: -#' \deqn{x_i = \frac{1}{\lambda} \sum_{j \in N} a_{i,j} x_j} -#' where \eqn{a_{i,j} = 1} if \eqn{i} is linked to \eqn{j} and 0 otherwise, -#' and \eqn{\lambda} is a constant representing the principal eigenvalue. -#' Rather than performing this iteration, -#' most routines solve the eigenvector equation \eqn{Ax = \lambda x}. -#' @param scale Logical scalar, whether to rescale the vector so the maximum score is 1. -#' @details -#' We use `{igraph}` routines behind the scenes here for consistency and because they are often faster. -#' For example, `igraph::eigencentrality()` is approximately 25% faster than `sna::evcent()`. -#' @references -#' Bonacich, Phillip. 1991. -#' “Simultaneous Group and Individual Centralities.” -#' _Social Networks_ 13(2):155–68. -#' \doi{10.1016/0378-8733(91)90018-O}. -#' @examples -#' node_eigenvector(mpn_elite_mex) -#' node_eigenvector(ison_southern_women) -#' @return A numeric vector giving the eigenvector centrality measure of each node. -#' @export -node_eigenvector <- function(.data, normalized = TRUE, scale = FALSE){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - weights <- `if`(manynet::is_weighted(.data), - manynet::tie_weights(.data), NA) - graph <- manynet::as_igraph(.data) - - if(!manynet::is_connected(.data)) - warning("Unconnected networks will only allow nodes from one component have non-zero eigenvector scores.") - - # Do the calculations - if (!manynet::is_twomode(graph)){ - out <- igraph::eigen_centrality(graph = graph, - directed = manynet::is_directed(graph), scale = scale, - options = igraph::arpack_defaults())$vector - if (normalized) out <- out / sqrt(1/2) - if(scale) out <- out / max(out) - } else { - eigen1 <- manynet::to_mode1(graph) - eigen1 <- igraph::eigen_centrality(graph = eigen1, - directed = manynet::is_directed(eigen1), scale = scale, - options = igraph::arpack_defaults())$vector - eigen2 <- manynet::to_mode2(graph) - eigen2 <- igraph::eigen_centrality(graph = eigen2, - directed = manynet::is_directed(eigen2), scale = scale, - options = igraph::arpack_defaults())$vector - out <- c(eigen1, eigen2) - if (normalized) out <- out / sqrt(1/2) - if(scale) out <- out / max(out) - } - out <- make_node_measure(out, .data) - out -} - -#' @rdname eigenv_centrality -#' @param exponent Decay rate for the Bonacich power centrality score. -#' @section Power centrality: -#' Power or beta (or Bonacich) centrality -#' @references -#' Bonacich, Phillip. 1987. -#' “Power and Centrality: A Family of Measures.” -#' _The American Journal of Sociology_, 92(5): 1170–82. -#' \doi{10.1086/228631}. -#' @importFrom igraph power_centrality -#' @examples -#' node_power(ison_southern_women, exponent = 0.5) -#' @return A numeric vector giving each node's power centrality measure. -#' @export -node_power <- function(.data, normalized = TRUE, scale = FALSE, exponent = 1){ - - if(missing(.data)) {expect_nodes(); .data <- .G()} - weights <- `if`(manynet::is_weighted(.data), - manynet::tie_weights(.data), NA) - graph <- manynet::as_igraph(.data) - - # Do the calculations - if (!manynet::is_twomode(graph)){ - out <- igraph::power_centrality(graph = graph, - exponent = exponent, - rescale = scale) - if (normalized) out <- out / sqrt(1/2) - } else { - eigen1 <- manynet::to_mode1(graph) - eigen1 <- igraph::power_centrality(graph = eigen1, - exponent = exponent, - rescale = scale) - eigen2 <- manynet::to_mode2(graph) - eigen2 <- igraph::power_centrality(graph = eigen2, - exponent = exponent, - rescale = scale) - out <- c(eigen1, eigen2) - if (normalized) out <- out / sqrt(1/2) - } - out <- make_node_measure(out, .data) - out -} - -#' @rdname eigenv_centrality -#' @param alpha A constant that trades off the importance of external influence against the importance of connection. -#' When \eqn{\alpha = 0}, only the external influence matters. -#' As \eqn{\alpha} gets larger, only the connectivity matters and we reduce to eigenvector centrality. -#' By default \eqn{\alpha = 0.85}. -#' @section Alpha centrality: -#' Alpha or Katz (or Katz-Bonacich) centrality operates better than eigenvector centrality -#' for directed networks. -#' Eigenvector centrality will return 0s for all nodes not in the main strongly-connected component. -#' Each node's alpha centrality can be defined as: -#' \deqn{x_i = \frac{1}{\lambda} \sum_{j \in N} a_{i,j} x_j + e_i} -#' where \eqn{a_{i,j} = 1} if \eqn{i} is linked to \eqn{j} and 0 otherwise, -#' \eqn{\lambda} is a constant representing the principal eigenvalue, -#' and \eqn{e_i} is some external influence used to ensure that even nodes beyond the main -#' strongly connected component begin with some basic influence. -#' Note that many equations replace \eqn{\frac{1}{\lambda}} with \eqn{\alpha}, -#' hence the name. -#' -#' For example, if \eqn{\alpha = 0.5}, then each direct connection (or alter) would be worth \eqn{(0.5)^1 = 0.5}, -#' each secondary connection (or tertius) would be worth \eqn{(0.5)^2 = 0.25}, -#' each tertiary connection would be worth \eqn{(0.5)^3 = 0.125}, and so on. -#' -#' Rather than performing this iteration though, -#' most routines solve the equation \eqn{x = (I - \frac{1}{\lambda} A^T)^{-1} e}. -#' @importFrom igraph alpha_centrality -#' @references -#' Katz, Leo 1953. -#' "A new status index derived from sociometric analysis". -#' _Psychometrika_. 18(1): 39–43. -#' -#' Bonacich, P. and Lloyd, P. 2001. -#' “Eigenvector-like measures of centrality for asymmetric relations” -#' _Social Networks_. 23(3):191-201. -#' @export -node_alpha <- function(.data, alpha = 0.85){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - make_node_measure(igraph::alpha_centrality(manynet::as_igraph(.data), - alpha = alpha), - .data) -} - -#' @rdname eigenv_centrality -#' @references -#' Brin, Sergey and Page, Larry. 1998. -#' "The anatomy of a large-scale hypertextual web search engine". -#' _Proceedings of the 7th World-Wide Web Conference_. Brisbane, Australia. -#' @export -node_pagerank <- function(.data){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - make_node_measure(igraph::page_rank(manynet::as_igraph(.data)), - .data) -} - -#' @rdname eigenv_centrality -#' @examples -#' tie_eigenvector(ison_adolescents) -#' @export -tie_eigenvector <- function(.data, normalized = TRUE){ - if(missing(.data)) {expect_nodes(); .data <- .G()} - edge_adj <- manynet::to_ties(.data) - out <- node_eigenvector(edge_adj, normalized = normalized) - class(out) <- "numeric" - out <- make_tie_measure(out, .data) - out -} - -#' @rdname eigenv_centrality -#' @examples -#' network_eigenvector(mpn_elite_mex) -#' network_eigenvector(ison_southern_women) -#' @export -network_eigenvector <- function(.data, normalized = TRUE){ - if (manynet::is_twomode(.data)) { - out <- c(igraph::centr_eigen(manynet::as_igraph(manynet::to_mode1(.data)), - normalized = normalized)$centralization, - igraph::centr_eigen(manynet::as_igraph(manynet::to_mode2(.data)), - normalized = normalized)$centralization) - } else { - out <- igraph::centr_eigen(manynet::as_igraph(.data), - normalized = normalized)$centralization - } - out <- make_network_measure(out, .data) - out -} - - diff --git a/R/measure_closure.R b/R/measure_closure.R deleted file mode 100644 index 25da08b85..000000000 --- a/R/measure_closure.R +++ /dev/null @@ -1,134 +0,0 @@ -#' Measures of network closure -#' -#' @description -#' These functions offer methods for summarising the closure in configurations -#' in one-, two-, and three-mode networks: -#' -#' - `network_reciprocity()` measures reciprocity in a (usually directed) network. -#' - `node_reciprocity()` measures nodes' reciprocity. -#' - `network_transitivity()` measures transitivity in a network. -#' - `node_transitivity()` measures nodes' transitivity. -#' - `network_equivalency()` measures equivalence or reinforcement -#' in a (usually two-mode) network. -#' - `network_congruency()` measures congruency across two two-mode networks. -#' -#' @details -#' For one-mode networks, shallow wrappers of igraph versions exist via -#' `network_reciprocity` and `network_transitivity`. -#' -#' For two-mode networks, `network_equivalency` calculates the proportion of three-paths in the network -#' that are closed by fourth tie to establish a "shared four-cycle" structure. -#' -#' For three-mode networks, `network_congruency` calculates the proportion of three-paths -#' spanning two two-mode networks that are closed by a fourth tie to establish a -#' "congruent four-cycle" structure. -#' @inheritParams cohesion -#' @param object2 Optionally, a second (two-mode) matrix, igraph, or tidygraph -#' @param method For reciprocity, either `default` or `ratio`. -#' See `?igraph::reciprocity` -#' @name closure -#' @family measures -#' @references -#' Robins, Garry L, and Malcolm Alexander. 2004. -#' Small worlds among interlocking directors: Network structure and distance in bipartite graphs. -#' \emph{Computational & Mathematical Organization Theory} 10(1): 69–94. -#' \doi{10.1023/B:CMOT.0000032580.12184.c0}. -#' -#' Knoke, David, Mario Diani, James Hollway, and Dimitris C Christopoulos. 2021. -#' \emph{Multimodal Political Networks}. -#' Cambridge University Press. Cambridge University Press. -#' \doi{10.1017/9781108985000} -NULL - -#' @rdname closure -#' @importFrom igraph reciprocity -#' @examples -#' network_reciprocity(ison_southern_women) -#' @export -network_reciprocity <- function(.data, method = "default") { - make_network_measure(igraph::reciprocity(manynet::as_igraph(.data), mode = method), - .data) -} - -#' @rdname closure -#' @examples -#' node_reciprocity(to_unweighted(ison_networkers)) -#' @export -node_reciprocity <- function(.data) { - out <- manynet::as_matrix(.data) - make_node_measure(rowSums(out * t(out))/rowSums(out), - .data) -} - -#' @rdname closure -#' @importFrom igraph transitivity -#' @examples -#' network_transitivity(ison_adolescents) -#' @export -network_transitivity <- function(.data) { - make_network_measure(igraph::transitivity(manynet::as_igraph(.data)), - .data) -} - -#' @rdname closure -#' @examples -#' node_transitivity(ison_adolescents) -#' @export -node_transitivity <- function(.data) { - make_node_measure(igraph::transitivity(manynet::as_igraph(.data), - type = "local"), - .data) -} - -#' @rdname closure -#' @section Equivalency: -#' The `network_equivalency()` function calculates the Robins and Alexander (2004) -#' clustering coefficient for two-mode networks. -#' Note that for weighted two-mode networks, the result is divided by the average tie weight. -#' @examples -#' network_equivalency(ison_southern_women) -#' @export -network_equivalency <- function(.data) { - if (manynet::is_twomode(.data)) { - mat <- manynet::as_matrix(.data) - c <- ncol(mat) - indegrees <- colSums(mat) - twopaths <- crossprod(mat) - diag(twopaths) <- 0 - output <- sum(twopaths * (twopaths - 1)) / - (sum(twopaths * (twopaths - 1)) + - sum(twopaths * - (matrix(indegrees, c, c) - twopaths))) - if (is.nan(output)) output <- 1 - if(manynet::is_weighted(.data)) output <- output / mean(mat[mat>0]) - } else stop("This function expects a two-mode network") - make_network_measure(output, .data) -} - -#' @rdname closure -#' @export -network_congruency <- function(.data, object2){ - if(missing(.data) | missing(object2)) stop("This function expects two two-mode networks") - if(!manynet::is_twomode(.data) | !manynet::is_twomode(object2)) stop("This function expects two two-mode networks") - if(manynet::network_dims(.data)[2] != manynet::network_dims(object2)[1]) - stop(paste("This function expects the number of nodes", - "in the second mode of the first network", "to be the same as the number of nodes", - "in the first mode of the second network.")) - mat1 <- manynet::as_matrix(.data) - mat2 <- manynet::as_matrix(object2) - connects <- ncol(mat1) - twopaths1 <- crossprod(mat1) - indegrees <- diag(twopaths1) - diag(twopaths1) <- 0 - twopaths2 <- tcrossprod(mat2) - outdegrees <- diag(twopaths2) - diag(twopaths2) <- 0 - twopaths <- twopaths1 + twopaths2 - degrees <- indegrees + outdegrees - output <- sum(twopaths * (twopaths - 1)) / - (sum(twopaths * (twopaths - 1)) + - sum(twopaths * - (matrix(degrees, connects, connects) - twopaths))) - if (is.nan(output)) output <- 1 - make_network_measure(output, .data) -} diff --git a/R/measure_cohesion.R b/R/measure_cohesion.R deleted file mode 100644 index f7b86843d..000000000 --- a/R/measure_cohesion.R +++ /dev/null @@ -1,108 +0,0 @@ -#' Measures of network cohesion or connectedness -#' -#' @description -#' These functions return values or vectors relating to how connected a network is -#' and the number of nodes or edges to remove that would increase fragmentation. -#' -#' - `network_density()` measures the ratio of ties to the number -#' of possible ties. -#' - `network_components()` measures the number of (strong) components -#' in the network. -#' - `network_cohesion()` measures the minimum number of nodes to remove -#' from the network needed to increase the number of components. -#' - `network_adhesion()` measures the minimum number of ties to remove -#' from the network needed to increase the number of components. -#' - `network_diameter()` measures the maximum path length in the network. -#' - `network_length()` measures the average path length in the network. -#' @param .data An object of a `{manynet}`-consistent class: -#' \itemize{ -#' \item matrix (adjacency or incidence) from `{base}` R -#' \item edgelist, a data frame from `{base}` R or tibble from `{tibble}` -#' \item igraph, from the `{igraph}` package -#' \item network, from the `{network}` package -#' \item tbl_graph, from the `{tidygraph}` package -#' } -#' @name cohesion -#' @family measures -NULL - -#' @rdname cohesion -#' @importFrom igraph edge_density -#' @examples -#' network_density(mpn_elite_mex) -#' network_density(mpn_elite_usa_advice) -#' @export -network_density <- function(.data) { - if (manynet::is_twomode(.data)) { - mat <- manynet::as_matrix(.data) - out <- sum(mat) / (nrow(mat) * ncol(mat)) - } else { - out <- igraph::edge_density(manynet::as_igraph(.data)) - } - make_network_measure(out, .data) -} - -#' @rdname cohesion -#' @section Cohesion: -#' To get the 'weak' components of a directed graph, -#' please use `manynet::to_undirected()` first. -#' @importFrom igraph components -#' @examples -#' network_components(mpn_ryanair) -#' network_components(manynet::to_undirected(mpn_ryanair)) -#' @export -network_components <- function(.data){ - object <- manynet::as_igraph(.data) - make_network_measure(igraph::components(object, mode = "strong")$no, - object) -} - -#' @rdname cohesion -#' @importFrom igraph cohesion -#' @references -#' White, Douglas R and Frank Harary. 2001. -#' "The Cohesiveness of Blocks In Social Networks: Node Connectivity and Conditional Density." -#' _Sociological Methodology_ 31(1): 305-59. -#' @examples -#' network_cohesion(manynet::ison_marvel_relationships) -#' network_cohesion(manynet::to_giant(manynet::ison_marvel_relationships)) -#' @export -network_cohesion <- function(.data){ - make_network_measure(igraph::cohesion(manynet::as_igraph(.data)), .data) -} - -#' @rdname cohesion -#' @importFrom igraph adhesion -#' @examples -#' network_adhesion(manynet::ison_marvel_relationships) -#' network_adhesion(manynet::to_giant(manynet::ison_marvel_relationships)) -#' @export -network_adhesion <- function(.data){ - make_network_measure(igraph::adhesion(manynet::as_igraph(.data)), .data) -} - -#' @rdname cohesion -#' @importFrom igraph diameter -#' @examples -#' network_diameter(manynet::ison_marvel_relationships) -#' network_diameter(manynet::to_giant(manynet::ison_marvel_relationships)) -#' @export -network_diameter <- function(.data){ - object <- manynet::as_igraph(.data) - make_network_measure(igraph::diameter(object, - directed = manynet::is_directed(object)), - object) -} - -#' @rdname cohesion -#' @importFrom igraph mean_distance -#' @examples -#' network_length(manynet::ison_marvel_relationships) -#' network_length(manynet::to_giant(manynet::ison_marvel_relationships)) -#' @export -network_length <- function(.data){ - object <- manynet::as_igraph(.data) - make_network_measure(igraph::mean_distance(object, - directed = manynet::is_directed(object)), - object) -} diff --git a/R/measure_diffusion.R b/R/measure_diffusion.R deleted file mode 100644 index 3bb594843..000000000 --- a/R/measure_diffusion.R +++ /dev/null @@ -1,394 +0,0 @@ -# net_diffusion #### - -#' Measures of network diffusion -#' @description -#' These functions allow measurement of various features of -#' a diffusion process: -#' -#' - `network_transmissibility()` measures the average transmissibility observed -#' in a diffusion simulation, or the number of new infections over -#' the number of susceptible nodes. -#' - `network_infection_length()` measures the average number of time steps -#' nodes remain infected once they become infected. -#' - `network_reproduction()` measures the observed reproductive number -#' in a diffusion simulation as the network's transmissibility over -#' the network's average infection length. -#' - `network_immunity()` measures the proportion of nodes that would need -#' to be protected through vaccination, isolation, or recovery for herd immunity to be reached. -#' - `network_hazard()` measures the hazard rate or instantaneous probability that -#' nodes will adopt/become infected at that time -#' -#' @param diff_model A valid network diffusion model, -#' as created by `as_diffusion()` or `play_diffusion()`. -#' @family measures -#' @family diffusion -#' @name net_diffusion -#' @examples -#' smeg <- manynet::generate_smallworld(15, 0.025) -#' smeg_diff <- play_diffusion(smeg, recovery = 0.2) -#' plot(smeg_diff) -#' @references -#' Kermack, W. and McKendrick, A., 1927. "A contribution to the mathematical theory of epidemics". -#' _Proc. R. Soc. London A_ 115: 700-721. -NULL - -#' @rdname net_diffusion -#' @section Transmissibility: -#' `network_transmissibility()` measures how many directly susceptible nodes -#' each infected node will infect in each time period, on average. -#' That is: -#' \deqn{T = \frac{1}{n}\sum_{j=1}^n \frac{i_{j}}{s_{j}}} -#' where \eqn{i} is the number of new infections in each time period, \eqn{j \in n}, -#' and \eqn{s} is the number of nodes that could have been infected in that time period -#' (note that \eqn{s \neq S}, or -#' the number of nodes that are susceptible in the population). -#' \eqn{T} can be interpreted as the proportion of susceptible nodes that are -#' infected at each time period. -#' @examples -#' # To calculate the average transmissibility for a given diffusion model -#' network_transmissibility(smeg_diff) -#' @export -network_transmissibility <- function(diff_model){ - out <- diff_model$I_new/diff_model$s - out <- out[-1] - out <- out[!is.infinite(out)] - out <- out[!is.nan(out)] - make_network_measure(mean(out, na.rm = TRUE), - attr(diff_model, "network")) -} - -#' @rdname net_diffusion -#' @section Infection length: -#' `network_infection_length()` measures the average number of time steps that -#' nodes in a network remain infected. -#' Note that in a diffusion model without recovery, average infection length -#' will be infinite. -#' This will also be the case where there is right censoring. -#' The longer nodes remain infected, the longer they can infect others. -#' @examples -#' # To calculate the average infection length for a given diffusion model -#' network_infection_length(smeg_diff) -#' @export -network_infection_length <- function(diff_model){ - make_network_measure(mean(node_infection_length(diff_model), na.rm = TRUE), - attr(diff_model, "network")) -} - -#' @rdname net_diffusion -#' @section Reproduction number: -#' `network_reproduction()` measures a given diffusion's reproductive number. -#' Here it is calculated as: -#' \deqn{R = \min\left(\frac{T}{1/IL}, \bar{k}\right)} -#' where \eqn{T} is the observed transmissibility in a diffusion -#' and \eqn{IL} is the observed infection length in a diffusion. -#' Since \eqn{IL} can be infinite where there is no recovery -#' or there is right censoring, -#' and since network structure places an upper limit on how many -#' nodes each node may further infect (their degree), -#' this function returns the minimum of \eqn{R_0} -#' and the network's average degree. -#' -#' Interpretation of the reproduction number is oriented around R = 1. -#' Where \eqn{R > 1}, the 'disease' will 'infect' more and more -#' nodes in the network. -#' Where \eqn{R < 1}, the 'disease' will not sustain itself and eventually -#' die out. -#' Where \eqn{R = 1}, the 'disease' will continue as endemic, -#' if conditions allow. -#' @examples -#' # To calculate the reproduction number for a given diffusion model -#' network_reproduction(smeg_diff) -#' @export -network_reproduction <- function(diff_model){ - net <- attr(diff_model, "network") - out <- network_transmissibility(diff_model)/ - (1/network_infection_length(diff_model)) - out <- min(out, mean(node_degree(net, normalized = FALSE))) - make_network_measure(out, net) -} - -#' @rdname net_diffusion -#' @section Herd immunity: -#' `network_immunity()` estimates the proportion of a network -#' that need to be protected from infection for herd immunity -#' to be achieved. -#' This is known as the Herd Immunity Threshold or HIT: -#' \deqn{1 - \frac{1}{R}} -#' where \eqn{R} is the reproduction number from `network_reproduction()`. -#' The HIT indicates the threshold at which -#' the reduction of susceptible members of the network means -#' that infections will no longer keep increasing. -#' Note that there may still be more infections after this threshold has been reached, -#' but there should be fewer and fewer. -#' These excess infections are called the _overshoot_. -#' This function does _not_ take into account the structure -#' of the network, instead using the average degree. -#' -#' Interpretation is quite straightforward. -#' A HIT or immunity score of 0.75 would mean that 75% of the nodes in the network -#' would need to be vaccinated or otherwise protected to achieve herd immunity. -#' To identify how many nodes this would be, multiply this proportion with the number -#' of nodes in the network. -#' @examples -#' # Calculating the proportion required to achieve herd immunity -#' network_immunity(smeg_diff) -#' # To find the number of nodes to be vaccinated -#' ceiling(network_immunity(smeg_diff) * manynet::network_nodes(smeg)) -#' @export -network_immunity <- function(diff_model){ - net <- attr(diff_model, "network") - out <- 1 - 1/network_reproduction(diff_model) - make_network_measure(out, net) -} - -#' @rdname net_diffusion -#' @section Hazard rate: -#' The hazard rate is the instantaneous probability of adoption/infection at each time point (Allison 1984). -#' In survival analysis, hazard rate is formally defined as: -#' -#' \deqn{% -#' \lambda(t)=\lim_{h\to +0}\frac{F(t+h)-F(t)}{h}\frac{1}{1-F(t)} % -#' }{% -#' \lambda(t-1)= lim (t -> +0) [F(t+h)-F(t)]/h * 1/[1-F(t)] % -#' } -#' -#' By approximating \eqn{h=1}, we can rewrite the equation as -#' -#' \deqn{% -#' \lambda(t)=\frac{F(t+1)-F(t)}{1-F(t)} % -#' }{% -#' \lambda(t-1)= [F(t+1)-F(t)]/[1-F(t)] % -#' } -#' -#' If we estimate \eqn{F(t)}, -#' the probability of not having adopted the innovation in time \eqn{t}, -#' from the proportion of adopters in that time, -#' such that \eqn{F(t) \sim q_t/n}{F(t) ~ q(t)/n}, we now have (ultimately for \eqn{t>1}): -#' -#' \deqn{% -#' \lambda(t)=\frac{q_{t+1}/n-q_t/n}{1-q_t/n} = \frac{q_{t+1} - q_t}{n - q_t} = \frac{q_t - q_{t-1}}{n - q_{t-1}} % -#' }{% -#' \lambda(t-1)= [q(t+1)/n-q(t)/n]/[1-q(t)/n] = [q(t+1) - q(t)]/[n - q(t)] = [q(t) - q(t-1)]/[n - q(t-1)] % -#' } -#' -#' where \eqn{q_i}{q(i)} is the number of adopters in time \eqn{t}, -#' and \eqn{n} is the number of vertices in the graph. -#' -#' The shape of the hazard rate indicates the pattern of new adopters over time. -#' Rapid diffusion with convex cumulative adoption curves will have -#' hazard functions that peak early and decay over time. -#' Slow concave cumulative adoption curves will have -#' hazard functions that are low early and rise over time. -#' Smooth hazard curves indicate constant adoption whereas -#' those that oscillate indicate variability in adoption behavior over time. -#' @source `{netdiffuseR}` -#' @references -#' Allison, P. 1984. _Event history analysis regression for longitudinal event data_. -#' London: Sage Publications. -#' -#' Wooldridge, J. M. 2010. _Econometric Analysis of Cross Section and Panel Data_ (2nd ed.). -#' Cambridge: MIT Press. -#' @examples -#' # To calculate the hazard rates at each time point -#' network_hazard(play_diffusion(smeg, transmissibility = 0.3)) -#' @export -network_hazard <- function(diff_model){ - out <- (diff_model$I - dplyr::lag(diff_model$I)) / - (diff_model$n - dplyr::lag(diff_model$I)) - out -} - -# node_diffusion #### - -#' Measures of nodes in a diffusion -#' @description -#' These functions allow measurement of various features of -#' a diffusion process: -#' -#' - `node_adoption_time()`: Measures the number of time steps until -#' nodes adopt/become infected -#' - `node_adopter()`: Classifies membership of nodes into diffusion categories -#' - `node_thresholds()`: Measures nodes' thresholds from the amount -#' of exposure they had when they became infected -#' - `node_infection_length()`: Measures the average length nodes that become -#' infected remain infected in a compartmental model with recovery -#' - `node_exposure()`: Measures how many exposures nodes have to -#' a given mark -#' - `node_is_exposed()`: Marks the nodes that are susceptible, -#' i.e. are in the immediate neighbourhood of given mark vector -#' -#' @inheritParams cohesion -#' @inheritParams net_diffusion -#' @family measures -#' @family diffusion -#' @name node_diffusion -#' @examples -#' smeg <- manynet::generate_smallworld(15, 0.025) -#' smeg_diff <- play_diffusion(smeg, recovery = 0.2) -#' plot(smeg_diff) -#' @references -#' Valente, Tom W. 1995. _Network models of the diffusion of innovations_ -#' (2nd ed.). Cresskill N.J.: Hampton Press. -NULL - -#' @rdname node_diffusion -#' @section Adoption time: -#' `node_adoption_time()` measures the time units it took -#' until each node became infected. -#' Note that an adoption time of 0 indicates that this was a seed node. -#' @examples -#' # To measure when nodes adopted a diffusion/were infected -#' (times <- node_adoption_time(smeg_diff)) -#' @export -node_adoption_time <- function(diff_model){ - event <- nodes <- NULL - out <- summary(diff_model) |> dplyr::filter(event == "I") |> - dplyr::distinct(nodes, .keep_all = TRUE) |> - dplyr::select(nodes,t) - net <- attr(diff_model, "network") - if(!manynet::is_labelled(net)) - out <- dplyr::arrange(out, nodes) else if (is.numeric(out$nodes)) - out$nodes <- manynet::node_names(net)[out$nodes] - out <- stats::setNames(out$t, out$nodes) - if(length(out) != manynet::network_nodes(net)){ - full <- rep(Inf, manynet::network_nodes(net)) - names(full) <- `if`(manynet::is_labelled(net), - manynet::node_names(net), - as.character(seq_len(manynet::network_nodes(net)))) - full[match(names(out), names(full))] <- out - out <- `if`(manynet::is_labelled(net), full, unname(full)) - } - if(!manynet::is_labelled(net)) out <- unname(out) - make_node_measure(out, net) -} - -#' @rdname node_diffusion -#' @section Adopter class: -#' `node_adopter()` classifies the nodes involved in a diffusion -#' by where on the distribution of adopters they fell. -#' Valente (1995) defines five memberships: -#' -#' - _Early adopter_: those with an adoption time less than -#' the average adoption time minus one standard deviation of adoptions times -#' - _Early majority_: those with an adoption time between -#' the average adoption time and -#' the average adoption time minus one standard deviation of adoptions times -#' - _Late majority_: those with an adoption time between -#' the average adoption time and -#' the average adoption time plus one standard deviation of adoptions times -#' - _Laggard_: those with an adoption time greater than -#' the average adoption time plus one standard deviation of adoptions times -#' - _Non-adopter_: those without an adoption time, -#' i.e. never adopted -#' @examples -#' # To classify nodes by their position in the adoption curve -#' (adopts <- node_adopter(smeg_diff)) -#' summary(adopts) -#' summary(times, membership = adopts) -#' @export -node_adopter <- function(diff_model){ - toa <- node_adoption_time(diff_model) - toa[is.infinite(toa)] <- NA - avg <- mean(toa, na.rm = TRUE) - sdv <- stats::sd(toa, na.rm = TRUE) - out <- ifelse(toa < (avg - sdv) | toa == 0, "Early Adopter", - ifelse(toa > (avg + sdv), "Laggard", - ifelse((avg - sdv) < toa & toa <= avg, "Early Majority", - ifelse(avg < toa & toa <= avg + sdv, "Late Majority", - "Non-Adopter")))) - out[is.na(out)] <- "Non-Adopter" - make_node_member(out, attr(diff_model, "network")) -} - -#' @rdname node_diffusion -#' @section Thresholds: -#' `node_thresholds()` infers nodes' thresholds based on how much -#' exposure they had when they were infected. -#' This inference is of course imperfect, -#' especially where there is a sudden increase in exposure, -#' but it can be used heuristically. -#' @examples -#' # To infer nodes' thresholds -#' node_thresholds(smeg_diff) -#' @export -node_thresholds <- function(diff_model){ - event <- nodes <- NULL - exposure <- NULL - out <- summary(diff_model) - net <- attr(diff_model, "network") - if(!"exposure" %in% names(out)){ - out[,'exposure'] <- NA_integer_ - for(v in unique(out$t)){ - out$exposure[out$t == v] <- node_exposure(diff_model, time = v)[out$nodes[out$t == v]] - } - } - if(any(out$event == "E")) - out <- out |> dplyr::filter(event == "E") else - out <- out |> dplyr::filter(event == "I") - out <- out |> dplyr::distinct(nodes, .keep_all = TRUE) |> - dplyr::select(nodes, exposure) - out <- stats::setNames(out$exposure, out$nodes) - if(length(out) != manynet::network_nodes(net)){ - full <- stats::setNames(rep(Inf, manynet::network_nodes(net)), - manynet::node_names(net)) - full[match(names(out), names(full))] <- out - out <- full - } - make_node_measure(out, net) -} - -#' @rdname node_diffusion -#' @section Infection length: -#' `node_infection_length()` measures the average length of time that nodes -#' that become infected remain infected in a compartmental model with recovery. -#' Infections that are not concluded by the end of the study period are -#' calculated as infinite. -#' @examples -#' # To measure how long each node remains infected for -#' node_infection_length(smeg_diff) -#' @export -node_infection_length <- function(diff_model){ - nodes <- NULL - events <- attr(diff_model, "events") - out <- vapply(seq_len(diff_model$n[1]), - function(x) ifelse("I" %in% dplyr::filter(events, nodes == x)$event, - ifelse("R" %in% dplyr::filter(events, nodes == x)$event, - mean(diff(dplyr::filter(events, nodes == x)$t)), - Inf), - NA), - FUN.VALUE = numeric(1)) - make_node_measure(out, attr(diff_model, "network")) -} - -#' @rdname node_diffusion -#' @param mark A valid 'node_mark' object or -#' logical vector (TRUE/FALSE) of length equal to -#' the number of nodes in the network. -#' @param time A time point until which infections/adoptions should be -#' identified. By default `time = 0`. -#' @section Exposure: -#' `node_exposure()` calculates the number of infected/adopting nodes -#' to which each susceptible node is exposed. -#' It usually expects network data and -#' an index or mark (TRUE/FALSE) vector of those nodes which are currently infected, -#' but if a diff_model is supplied instead it will return -#' nodes exposure at \eqn{t = 0}. -#' @examples -#' # To measure how much exposure nodes have to a given mark -#' node_exposure(smeg, mark = c(1,3)) -#' node_exposure(smeg_diff) -#' @export -node_exposure <- function(.data, mark, time = 0){ - if(missing(mark) && inherits(.data, "diff_model")){ - mark <- manynet::node_is_infected(.data, time = time) - .data <- attr(.data, "network") - } - if(is.logical(mark)) mark <- which(mark) - contacts <- unlist(lapply(igraph::neighborhood(.data, nodes = mark), - function(x) setdiff(x, mark))) - # count exposures for each node: - tabcontact <- table(contacts) - out <- rep(0, manynet::network_nodes(.data)) - out[as.numeric(names(tabcontact))] <- unname(tabcontact) - make_node_measure(out, .data) -} diff --git a/R/measure_features.R b/R/measure_features.R deleted file mode 100644 index 63ee5d498..000000000 --- a/R/measure_features.R +++ /dev/null @@ -1,377 +0,0 @@ -# Topological features #### - -#' Measures of network topological features -#' @description -#' These functions measure certain topological features of networks: -#' -#' - `network_core()` measures the correlation between a network -#' and a core-periphery model with the same dimensions. -#' - `network_richclub()` measures the rich-club coefficient of a network. -#' - `network_factions()` measures the correlation between a network -#' and a component model with the same dimensions. -#' If no 'membership' vector is given for the data, -#' `node_kernighanlin()` is used to partition nodes into two groups. -#' - `network_modularity()` measures the modularity of a network -#' based on nodes' membership in defined clusters. -#' - `network_smallworld()` measures the small-world coefficient for one- or -#' two-mode networks. Small-world networks can be highly clustered and yet -#' have short path lengths. -#' - `network_scalefree()` measures the exponent of a fitted -#' power-law distribution. An exponent between 2 and 3 usually indicates -#' a power-law distribution. -#' - `network_balance()` measures the structural balance index on -#' the proportion of balanced triangles, -#' ranging between `0` if all triangles are imbalanced and -#' `1` if all triangles are balanced. -#' - `network_change()` measures the Hamming distance between two or more networks. -#' - `network_stability()` measures the Jaccard index of stability between two or more networks. -#' -#' These `network_*()` functions return a single numeric scalar or value. -#' @inheritParams cohesion -#' @param membership A vector of partition membership. -#' @name features -#' @family measures -NULL - -#' @rdname features -#' @examples -#' network_core(ison_adolescents) -#' network_core(ison_southern_women) -#' @references -#' Borgatti, Stephen P., and Martin G. Everett. 2000. -#' “Models of Core/Periphery Structures.” -#' _Social Networks_ 21(4):375–95. -#' \doi{10.1016/S0378-8733(99)00019-2} -#' @export -network_core <- function(.data, - membership = NULL){ - if(is.null(membership)) membership <- node_core(.data) - out <- stats::cor(c(manynet::as_matrix(.data)), - c(manynet::as_matrix(manynet::create_core(.data, - membership = membership)))) - make_network_measure(out, .data) -} - -#' @rdname features -#' @examples -#' network_richclub(ison_adolescents) -#' @export -network_richclub <- function(.data){ - coefs <- vector() - temp <- .data - for(k in seq_len(max(node_degree(temp, normalized = FALSE)))){ - richclub <- manynet::to_subgraph(temp, node_degree(temp, normalized = FALSE) >= k) - nk <- manynet::network_nodes(richclub) - ek <- ifelse(manynet::is_directed(temp), - manynet::network_ties(richclub), - 2*manynet::network_ties(richclub)) - coefs <- c(coefs, (ek)/(nk*(nk-1))) - } - - elbow_finder <- function(x_values, y_values) { - # Max values to create line - # if(min(x_values)==1) x_values <- x_values[2:length(x_values)] - # if(min(y_values)==0) y_values <- y_values[2:length(y_values)] - max_df <- data.frame(x = c(1, min(which(y_values == 1))), - y = c(min(y_values), max(y_values))) - # Creating straight line between the max values - fit <- stats::lm(max_df$y ~ max_df$x) - # Distance from point to line - distances <- vector() - for (i in seq_len(length(x_values))) { - distances <- c(distances, - abs(stats::coef(fit)[2]*x_values[i] - - y_values[i] + - coef(fit)[1]) / - sqrt(stats::coef(fit)[2]^2 + 1^2)) - } - # Max distance point - x_max_dist <- x_values[which.max(distances)] - x_max_dist - } - - coefs[is.nan(coefs)] <- 1 - out <- coefs[elbow_finder(seq_along(coefs), coefs)] - # max(coefs, na.rm = TRUE) - make_network_measure(out, .data) -} - -#' @rdname features -#' @examples -#' network_factions(mpn_elite_mex) -#' network_factions(ison_southern_women) -#' @export -network_factions <- function(.data, - membership = NULL){ - if(is.null(membership)) - membership <- node_kernighanlin(.data) - out <- stats::cor(c(manynet::as_matrix(.data)), - c(manynet::as_matrix(manynet::create_components(.data, - membership = membership)))) - make_network_measure(out, .data) -} - -#' @rdname features -#' @section Modularity: -#' Modularity measures the difference between the number of ties within each community -#' from the number of ties expected within each community in a random graph -#' with the same degrees, and ranges between -1 and +1. -#' Modularity scores of +1 mean that ties only appear within communities, -#' while -1 would mean that ties only appear between communities. -#' A score of 0 would mean that ties are half within and half between communities, -#' as one would expect in a random graph. -#' -#' Modularity faces a difficult problem known as the resolution limit -#' (Fortunato and Barthélemy 2007). -#' This problem appears when optimising modularity, -#' particularly with large networks or depending on the degree of interconnectedness, -#' can miss small clusters that 'hide' inside larger clusters. -#' In the extreme case, this can be where they are only connected -#' to the rest of the network through a single tie. -#' @param resolution A proportion indicating the resolution scale. -#' By default 1. -#' @examples -#' network_modularity(ison_adolescents, -#' node_kernighanlin(ison_adolescents)) -#' network_modularity(ison_southern_women, -#' node_kernighanlin(ison_southern_women)) -#' @references -#' Murata, Tsuyoshi. 2010. Modularity for Bipartite Networks. -#' In: Memon, N., Xu, J., Hicks, D., Chen, H. (eds) -#' _Data Mining for Social Network Data. Annals of Information Systems_, Vol 12. -#' Springer, Boston, MA. -#' \doi{10.1007/978-1-4419-6287-4_7} -#' @export -network_modularity <- function(.data, - membership = NULL, - resolution = 1){ - if(is.null(membership)) - membership <- node_kernighanlin(.data) - if(!manynet::is_graph(.data)) .data <- manynet::as_igraph(.data) - if(manynet::is_twomode(.data)){ - make_network_measure(igraph::modularity(manynet::to_multilevel(.data), - membership = membership, - resolution = resolution), .data) - } else make_network_measure(igraph::modularity(.data, - membership = membership, - resolution = resolution), - .data) -} - -#' @rdname features -#' @param times Integer of number of simulations. -#' @param method There are three small-world measures implemented: -#' - "sigma" is the original equation from Watts and Strogatz (1998), -#' \deqn{\frac{\frac{C}{C_r}}{\frac{L}{L_r}}}, -#' where \eqn{C} and \eqn{L} are the observed -#' clustering coefficient and path length, respectively, -#' and \eqn{C_r} and \eqn{L_r} are the averages obtained from -#' random networks of the same dimensions and density. -#' A \eqn{\sigma > 1} is considered to be small-world, -#' but this measure is highly sensitive to network size. -#' - "omega" (the default) is an update from Telesford et al. (2011), -#' \deqn{\frac{L_r}{L} - \frac{C}{C_l}}, -#' where \eqn{C_l} is the clustering coefficient for a lattice graph -#' with the same dimensions. -#' \eqn{\omega} ranges between 0 and 1, -#' where 1 is as close to a small-world as possible. -#' - "SWI" is an alternative proposed by Neal (2017), -#' \deqn{\frac{L - L_l}{L_r - L_l} \times \frac{C - C_r}{C_l - C_r}}, -#' where \eqn{L_l} is the average path length for a lattice graph -#' with the same dimensions. -#' \eqn{SWI} also ranges between 0 and 1 with the same interpretation, -#' but where there may not be a network for which \eqn{SWI = 1}. -#' @seealso [network_transitivity()] and [network_equivalency()] -#' for how clustering is calculated -#' @references -#' Watts, Duncan J., and Steven H. Strogatz. 1998. -#' “Collective Dynamics of ‘Small-World’ Networks.” -#' _Nature_ 393(6684):440–42. -#' \doi{10.1038/30918}. -#' -#' Telesford QK, Joyce KE, Hayasaka S, Burdette JH, Laurienti PJ. 2011. -#' "The ubiquity of small-world networks". -#' _Brain Connectivity_ 1(5): 367–75. -#' \doi{10.1089/brain.2011.0038}. -#' -#' Neal Zachary P. 2017. -#' "How small is it? Comparing indices of small worldliness". -#' _Network Science_. 5 (1): 30–44. -#' \doi{10.1017/nws.2017.5}. -#' @examples -#' network_smallworld(ison_brandes) -#' network_smallworld(ison_southern_women) -#' @export -network_smallworld <- function(.data, - method = c("omega", "sigma", "SWI"), - times = 100) { - - method <- match.arg(method) - - if(manynet::is_twomode(.data)){ - co <- network_equivalency(.data) - cr <- mean(vapply(1:times, - function(x) network_equivalency(manynet::generate_random(.data)), - FUN.VALUE = numeric(1))) - if(method %in% c("omega", "SWI")){ - cl <- network_equivalency(manynet::create_ring(.data)) - } - } else { - co <- network_transitivity(.data) - cr <- mean(vapply(1:times, - function(x) network_transitivity(manynet::generate_random(.data)), - FUN.VALUE = numeric(1))) - if(method %in% c("omega", "SWI")){ - cl <- network_transitivity(manynet::create_lattice(.data)) - } - } - - lo <- network_length(.data) - lr <- mean(vapply(1:times, - function(x) network_length(manynet::generate_random(.data)), - FUN.VALUE = numeric(1))) - if(method == "SWI"){ - ll <- network_length(manynet::create_ring(.data)) - } - - out <- switch(method, - "omega" = (lr/lo - co/cl), - "sigma" = (co/cr)/(lo/lr), - "SWI" = ((lo - ll)/(lr - ll))*((co - cr)/(cl - cr))) - make_network_measure(out, - .data) -} - -#' @rdname features -#' @importFrom igraph fit_power_law -#' @examples -#' network_scalefree(ison_adolescents) -#' network_scalefree(generate_scalefree(50, 1.5)) -#' network_scalefree(create_lattice(100)) -#' @export -network_scalefree <- function(.data){ - out <- igraph::fit_power_law(node_degree(.data, normalized = FALSE)) - if ("KS.p" %in% names(out)) { - if(out$KS.p < 0.05) - cat(paste("Note: Kolgomorov-Smirnov test that data", - "could have been drawn from a power-law", - "distribution rejected.\n")) - } - make_network_measure(out$alpha, .data) -} - -#' @rdname features -#' @source `{signnet}` by David Schoch -#' @examples -#' network_balance(ison_marvel_relationships) -#' @export -network_balance <- function(.data) { - - count_signed_triangles <- function(.data){ - g <- manynet::as_igraph(.data) - if (!"sign" %in% igraph::edge_attr_names(g)) { - stop("network does not have a sign edge attribute") - } - if (igraph::is_directed(g)) { - stop("g must be undirected") - } - eattrV <- igraph::edge_attr(g, "sign") - if (!all(eattrV %in% c(-1, 1))) { - stop("sign may only contain -1 and 1") - } - tmat <- t(matrix(igraph::triangles(g), nrow = 3)) - if (nrow(tmat) == 0) { - warning("g does not contain any triangles") - return(c(`+++` = 0, `++-` = 0, `+--` = 0, `---` = 0)) - } - emat <- t(apply(tmat, 1, function(x) c(igraph::get.edge.ids(g, - x[1:2]), igraph::get.edge.ids(g, x[2:3]), igraph::get.edge.ids(g, - x[c(3, 1)])))) - emat[, 1] <- eattrV[emat[, 1]] - emat[, 2] <- eattrV[emat[, 2]] - emat[, 3] <- eattrV[emat[, 3]] - emat <- t(apply(emat, 1, sort)) - emat_df <- as.data.frame(emat) - res <- stats::aggregate(list(count = rep(1, nrow(emat_df))), - emat_df, length) - tri_counts <- c(`+++` = 0, `++-` = 0, `+--` = 0, `---` = 0) - tmp_counts <- res[, 4] - if (nrow(res) == 1) { - names(tmp_counts) <- paste0(c("+", "-")[(rev(res[1:3]) == - -1) + 1], collapse = "") - } - else { - names(tmp_counts) <- apply(res[, 1:3], 1, function(x) paste0(c("+", - "-")[(rev(x) == -1) + 1], collapse = "")) - } - tri_counts[match(names(tmp_counts), names(tri_counts))] <- tmp_counts - tri_counts - } - - if (!manynet::is_signed(.data)) { - stop("network does not have a sign edge attribute") - } - if (manynet::is_directed(.data)) { - stop("object must be undirected") - } - g <- manynet::as_igraph(.data) - eattrV <- igraph::edge_attr(g, "sign") - if (!all(eattrV %in% c(-1, 1))) { - stop("sign may only contain -1 and 1") - } - tria_count <- count_signed_triangles(g) - make_network_measure(unname((tria_count["+++"] + tria_count["+--"])/sum(tria_count)), - .data) -} - -# Change #### - -#' Measures of network change -#' @description -#' These functions measure certain topological features of networks: -#' -#' - `network_change()` measures the Hamming distance between two or more networks. -#' - `network_stability()` measures the Jaccard index of stability between two or more networks. -#' -#' These `network_*()` functions return a numeric vector the length of the number -#' of networks minus one. E.g., the periods between waves. -#' @inheritParams cohesion -#' @name periods -#' @family measures -NULL - -#' @rdname periods -#' @param object2 A network object. -#' @export -network_change <- function(.data, object2){ - if(manynet::is_list(.data)){ - - } else if(!missing(object2)){ - .data <- list(.data, object2) - } else stop("`.data` must be a list of networks or a second network must be provided.") - periods <- length(.data)-1 - vapply(seq.int(periods), function(x){ - net1 <- manynet::as_matrix(.data[[x]]) - net2 <- manynet::as_matrix(.data[[x+1]]) - sum(net1 != net2) - }, FUN.VALUE = numeric(1)) -} - -#' @rdname periods -#' @export -network_stability <- function(.data, object2){ - if(manynet::is_list(.data)){ - - } else if(!missing(object2)){ - .data <- list(.data, object2) - } else stop("`.data` must be a list of networks or a second network must be provided.") - periods <- length(.data)-1 - vapply(seq.int(periods), function(x){ - net1 <- manynet::as_matrix(.data[[x]]) - net2 <- manynet::as_matrix(.data[[x+1]]) - n11 <- sum(net1 * net2) - n01 <- sum(net1==0 * net2) - n10 <- sum(net1 * net2==0) - n11 / (n01 + n10 + n11) - }, FUN.VALUE = numeric(1)) -} diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R deleted file mode 100644 index b146836ab..000000000 --- a/R/measure_heterogeneity.R +++ /dev/null @@ -1,200 +0,0 @@ -#' Measures of network diversity -#' -#' @description -#' These functions offer ways to summarise the heterogeneity of an attribute -#' across a network, within groups of a network, or the distribution of ties -#' across this attribute: -#' -#' - `network_richness()` measures the number of unique categories -#' in a network attribute. -#' - `node_richness()` measures the number of unique categories -#' of an attribute to which each node is connected. -#' - `network_diversity()` measures the heterogeneity of ties across a network -#' or within clusters by node attributes. -#' - `node_diversity()` measures the heterogeneity of each node's -#' local neighbourhood. -#' - `network_heterophily()` measures how embedded nodes in the network -#' are within groups of nodes with the same attribute. -#' - `node_heterophily()` measures each node's embeddedness within groups -#' of nodes with the same attribute. -#' - `network_assortativity()` measures the degree assortativity in a network. -#' - `network_spatial()` measures the spatial association/autocorrelation ( -#' global Moran's I) in a network. -#' -#' @inheritParams cohesion -#' @param attribute Name of a nodal attribute or membership vector -#' to use as categories for the diversity measure. -#' @param clusters A nodal cluster membership vector or name of a vertex attribute. -#' @name heterogeneity -#' @family measures -NULL - -#' @rdname heterogeneity -#' @examples -#' network_richness(mpn_bristol) -#' @export -network_richness <- function(.data, attribute){ - make_network_measure(length(unique(manynet::node_attribute(.data, attribute))), - .data) -} - -#' @rdname heterogeneity -#' @examples -#' node_richness(mpn_bristol, "type") -#' @export -node_richness <- function(.data, attribute){ - out <- vapply(manynet::to_egos(.data, min_dist = 1), - function(x) length(unique(manynet::node_attribute(x, attribute))), - FUN.VALUE = numeric(1)) - make_node_measure(out, .data) -} - -#' @rdname heterogeneity -#' @section network_diversity: -#' Blau's index (1977) uses a formula known also in other disciplines -#' by other names -#' (Gini-Simpson Index, Gini impurity, Gini's diversity index, -#' Gibbs-Martin index, and probability of interspecific encounter (PIE)): -#' \deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }}, -#' where \eqn{p_i} is the proportion of group members in \eqn{i}th category -#' and \eqn{k} is the number of categories for an attribute of interest. -#' This index can be interpreted as the probability that two members -#' randomly selected from a group would be from different categories. -#' This index finds its minimum value (0) when there is no variety, -#' i.e. when all individuals are classified in the same category. -#' The maximum value depends on the number of categories and -#' whether nodes can be evenly distributed across categories. -#' @references -#' Blau, Peter M. (1977). -#' _Inequality and heterogeneity_. -#' New York: Free Press. -#' @examples -#' marvel_friends <- manynet::to_unsigned(manynet::ison_marvel_relationships, "positive") -#' network_diversity(marvel_friends, "Gender") -#' network_diversity(marvel_friends, "Attractive") -#' network_diversity(marvel_friends, "Gender", "Rich") -#' @export -network_diversity <- function(.data, attribute, clusters = NULL){ - blau <- function(features) { 1 - sum((table(features)/length(features))^2) } - attr <- manynet::node_attribute(.data, attribute) - if (is.null(clusters)) { - blauout <- blau(attr) - } else if (is.numeric(clusters) && is.vector(clusters)) { - blauout <- vapply(unique(clusters), - function(i) blau(attr[clusters == i]), - numeric(1)) - names(blauout) <- paste0("Cluster ", unique(clusters)) - } else if (is.character(clusters)) { - clu <- manynet::node_attribute(.data, clusters) - blauout <- vapply(unique(clu), - function(i) blau(attr[clu == i]), - numeric(1)) - names(blauout) <- paste0("Cluster ", unique(clu)) - blauout <- blauout[order(names(blauout))] - } else stop("`clusters` must be the name of a nodal variable in the object.") - make_network_measure(blauout, .data) -} - -#' @rdname heterogeneity -#' @examples -#' node_diversity(marvel_friends, "Gender") -#' node_diversity(marvel_friends, "Attractive") -#' @export -node_diversity <- function(.data, attribute){ - out <- vapply(igraph::ego(manynet::as_igraph(.data)), - function(x) network_diversity( - igraph::induced_subgraph(manynet::as_igraph(.data), x), - attribute), - FUN.VALUE = numeric(1)) - make_node_measure(out, .data) -} - -#' @rdname heterogeneity -#' @section network_homophily: -#' Given a partition of a network into a number of mutually exclusive groups then -#' The E-I index is the number of ties between (or _external_) nodes -#' grouped in some mutually exclusive categories -#' minus the number of ties within (or _internal_) these groups -#' divided by the total number of ties. -#' This value can range from 1 to -1, -#' where 1 indicates ties only between categories/groups and -1 ties only within categories/groups. -#' @references -#' Krackhardt, David and Robert N. Stern (1988). -#' Informal networks and organizational crises: an experimental simulation. -#' _Social Psychology Quarterly_ 51(2), 123-140. -#' @examples -#' network_heterophily(marvel_friends, "Gender") -#' network_heterophily(marvel_friends, "Attractive") -#' @export -network_heterophily <- function(.data, attribute){ - m <- manynet::as_matrix(.data) - if (length(attribute) == 1 && is.character(attribute)) { - attribute <- manynet::node_attribute(.data, attribute) - } - if (is.character(attribute) | is.numeric(attribute)) { - attribute <- as.factor(attribute) - } - same <- outer(attribute, attribute, "==") - nInternal <- sum(m * same, na.rm = TRUE) - nExternal <- sum(m, na.rm = TRUE) - nInternal - ei <- (nExternal - nInternal) / sum(m, na.rm = TRUE) - make_network_measure(ei, .data) -} - -#' @rdname heterogeneity -#' @examples -#' node_heterophily(marvel_friends, "Gender") -#' node_heterophily(marvel_friends, "Attractive") -#' @export -node_heterophily <- function(.data, attribute){ - m <- manynet::as_matrix(.data) - if (length(attribute) == 1 && is.character(attribute)) { - attribute <- manynet::node_attribute(.data, attribute) - } - if (is.character(attribute) | is.numeric(attribute)) { - attribute <- as.factor(attribute) - } - if(anyNA(attribute)){ - m[is.na(attribute),] <- NA - m[,is.na(attribute)] <- NA - } - same <- outer(attribute, attribute, "==") - nInternal <- rowSums(m * same, na.rm = TRUE) - nInternal[is.na(attribute)] <- NA - nExternal <- rowSums(m, na.rm = TRUE) - nInternal - ei <- (nExternal - nInternal) / rowSums(m, na.rm = TRUE) - make_node_measure(ei, .data) -} - -#' @rdname heterogeneity -#' @importFrom igraph assortativity_degree -#' @examples -#' network_assortativity(mpn_elite_mex) -#' @export -network_assortativity <- function(.data){ - make_network_measure(igraph::assortativity_degree(manynet::as_igraph(.data), - directed = manynet::is_directed(.data)), - .data) -} - -#' @rdname heterogeneity -#' @references -#' Moran, Patrick Alfred Pierce. 1950. -#' "Notes on Continuous Stochastic Phenomena". -#' _Biometrika_ 37(1): 17-23. -#' \doi{10.2307/2332142} -#' @examples -#' network_spatial(ison_lawfirm, "age") -#' @export -network_spatial <- function(.data, attribute){ - N <- manynet::network_nodes(.data) - x <- manynet::node_attribute(.data, attribute) - stopifnot(is.numeric(x)) - x_bar <- mean(x, na.rm = TRUE) - w <- manynet::as_matrix(.data) - W <- sum(w, na.rm = TRUE) - I <- (N/W) * - (sum(w * matrix(x - x_bar, N, N) * matrix(x - x_bar, N, N, byrow = TRUE)) / - sum((x - x_bar)^2)) - make_network_measure(I, .data) -} diff --git a/R/measure_hierarchy.R b/R/measure_hierarchy.R deleted file mode 100644 index ea6861f9c..000000000 --- a/R/measure_hierarchy.R +++ /dev/null @@ -1,66 +0,0 @@ -#' Graph theoretic dimensions of hierarchy -#' -#' @description -#' These functions, together with `network_reciprocity()`, are used jointly to -#' measure how hierarchical a network is: -#' -#' - `network_connectedness()` measures the proportion of dyads in the network -#' that are reachable to one another, -#' or the degree to which network is a single component. -#' - `network_efficiency()` measures the Krackhardt efficiency score. -#' - `network_upperbound()` measures the Krackhardt (least) upper bound score. -#' -#' @inheritParams cohesion -#' @name hierarchy -#' @family measures -#' @references -#' Krackhardt, David. 1994. -#' Graph theoretical dimensions of informal organizations. -#' In Carley and Prietula (eds) _Computational Organizational Theory_, -#' Hillsdale, NJ: Lawrence Erlbaum Associates. Pp. 89-111. -#' -#' Everett, Martin, and David Krackhardt. 2012. -#' “A second look at Krackhardt's graph theoretical dimensions of informal organizations.” -#' _Social Networks_, 34: 159-163. -#' \doi{10.1016/j.socnet.2011.10.006} -#' @examples -#' network_connectedness(ison_networkers) -#' 1 - network_reciprocity(ison_networkers) -#' network_efficiency(ison_networkers) -#' network_upperbound(ison_networkers) -NULL - -#' @rdname hierarchy -#' @export -network_connectedness <- function(.data){ - dists <- igraph::distances(manynet::as_igraph(.data)) - make_network_measure(1 - sum(dists==Inf)/sum(dists!=0), - .data) -} - -#' @rdname hierarchy -#' @export -network_efficiency <- function(.data) { - degs <- node_indegree(.data, normalized = FALSE) - out <- (manynet::network_nodes(.data)-1)/sum(degs) - make_network_measure(out, .data) -} - -#' @rdname hierarchy -#' @export -network_upperbound <- function(.data) { - dists <- igraph::distances(.data, mode = "in") - dists[is.infinite(dists)] <- 0 - dists <- dists[order(rowSums(dists)), order(rowSums(dists))] - if (max(colSums(dists > 0)) / (manynet::network_nodes(.data)-1) == 1){ - out <- 1 - } else { - out <- apply(utils::combn(2:nrow(dists), 2), 2, - function(x){ - ubs <- dists[x,]>0 - any(ubs[1,]*ubs[2,]==1) - }) - out <- sum(out)/length(out) - } - make_network_measure(out, .data) -} \ No newline at end of file diff --git a/R/measure_holes.R b/R/measure_holes.R deleted file mode 100644 index ea2fb2b5e..000000000 --- a/R/measure_holes.R +++ /dev/null @@ -1,252 +0,0 @@ -#' Measures of structural holes -#' -#' @description -#' These function provide different measures of the degree to which nodes -#' fill structural holes, as outlined in Burt (1992): -#' -#' - `node_bridges()` measures the sum of bridges to which each node -#' is adjacent. -#' - `node_redundancy()` measures the redundancy of each nodes' contacts. -#' - `node_effsize()` measures nodes' effective size. -#' - `node_efficiency()` measures nodes' efficiency. -#' - `node_constraint()` measures nodes' constraint scores for one-mode networks -#' according to Burt (1992) and for two-mode networks according to Hollway et al (2020). -#' - `node_hierarchy()` measures nodes' exposure to hierarchy, -#' where only one or two contacts are the source of closure. -#' - `node_eccentricity()` measures nodes' eccentricity or Koenig number, -#' a measure of farness based on number of links needed to reach -#' most distant node in the network. -#' - `node_neighbours_degree()` measures nodes' average nearest neighbors degree, -#' or \eqn{knn}, a measure of the type of local environment a node finds itself in -#' - `tie_cohesion()` measures the ratio between common neighbors to ties' -#' adjacent nodes and the total number of adjacent nodes, -#' where high values indicate ties' embeddedness in dense local environments -#' -#' Burt's theory holds that while those nodes embedded in dense clusters -#' of close connections are likely exposed to the same or similar ideas and information, -#' those who fill structural holes between two otherwise disconnected groups -#' can gain some comparative advantage from that position. -#' @details -#' A number of different ways of measuring these structural holes are available. -#' Note that we use Borgatti's reformulation for unweighted networks in -#' `node_redundancy()` and `node_effsize()`. -#' Redundancy is thus \eqn{\frac{2t}{n}}, -#' where \eqn{t} is the sum of ties and \eqn{n} the sum of nodes in each node's neighbourhood, -#' and effective size is calculated as \eqn{n - \frac{2t}{n}}. -#' Node efficiency is the node's effective size divided by its degree. -#' @name holes -#' @family measures -#' @references -#' Burt, Ronald S. 1992. -#' _Structural Holes: The Social Structure of Competition_. -#' Cambridge, MA: Harvard University Press. -#' @inheritParams cohesion -NULL - -#' @rdname holes -#' @examples -#' node_bridges(ison_adolescents) -#' node_bridges(ison_southern_women) -#' @export -node_bridges <- function(.data){ - g <- manynet::as_igraph(.data) - .inc <- NULL - out <- vapply(igraph::V(g), function(ego){ - length(igraph::E(g)[.inc(ego) & manynet::tie_is_bridge(g)==1]) - }, FUN.VALUE = numeric(1)) - make_node_measure(out, .data) -} - -#' @rdname holes -#' @references -#' Borgatti, Steven. 1997. -#' “\href{http://www.analytictech.com/connections/v20(1)/holes.htm}{Structural Holes: Unpacking Burt’s Redundancy Measures}” -#' _Connections_ 20(1):35-38. -#' -#' Burchard, Jake, and Benjamin Cornwell. 2018. -#' “Structural Holes and Bridging in Two-Mode Networks.” -#' _Social Networks_ 55:11–20. -#' \doi{10.1016/j.socnet.2018.04.001} -#' @examples -#' node_redundancy(ison_adolescents) -#' node_redundancy(ison_southern_women) -#' @export -node_redundancy <- function(.data){ - if(manynet::is_twomode(.data)){ - mat <- manynet::as_matrix(.data) - out <- c(.redund2(mat), .redund2(t(mat))) - } else { - out <- .redund(manynet::as_matrix(.data)) - } - make_node_measure(out, .data) -} - -.redund <- function(.mat){ - n <- nrow(.mat) - qs <- .twopath_matrix(.mat > 0) - piq <- .mat/rowSums(.mat) - mjq <- .mat/matrix(do.call("pmax",data.frame(.mat)),n,n) - out <- rowSums(qs * piq * mjq) - out -} - -.redund2 <- function(.mat){ - sigi <- .mat %*% t(.mat) - diag(sigi) <- 0 - vapply(seq.int(nrow(sigi)), - function(x){ - xvec <- sigi[x,] #> 0 - if(manynet::is_weighted(.mat)){ - wt <- colMeans((.mat[x,] > 0 * t(.mat[xvec > 0,])) * t(.mat[xvec > 0,]) + .mat[x,]) * 2 - } else wt <- 1 - sum(colSums(xvec > 0 & t(sigi[xvec > 0,])) * xvec[xvec > 0] / - (sum(xvec) * wt)) - }, FUN.VALUE = numeric(1)) -} - -#' @rdname holes -#' @examples -#' node_effsize(ison_adolescents) -#' node_effsize(ison_southern_women) -#' @export -node_effsize <- function(.data){ - if(manynet::is_twomode(.data)){ - mat <- manynet::as_matrix(.data) - out <- c(rowSums(manynet::as_matrix(manynet::to_mode1(.data))>0), - rowSums(manynet::as_matrix(manynet::to_mode2(.data))>0)) - node_redundancy(.data) - } else { - mat <- manynet::as_matrix(.data) - out <- rowSums(mat>0) - .redund(mat) - } - make_node_measure(out, .data) -} - -.twopath_matrix <- function(.data){ - .data <- manynet::as_matrix(.data) - qs <- .data %*% t(.data) - diag(qs) <- 0 - qs -} - - -#' @rdname holes -#' @examples -#' node_efficiency(ison_adolescents) -#' node_efficiency(ison_southern_women) -#' @export -node_efficiency <- function(.data){ - out <- node_effsize(.data) / node_degree(.data, normalized = FALSE) - make_node_measure(as.numeric(out), .data) -} - -#' @rdname holes -#' @references -#' Hollway, James, Jean-Frédéric Morin, and Joost Pauwelyn. 2020. -#' "Structural conditions for novelty: the introduction of new environmental clauses to the trade regime complex." -#' _International Environmental Agreements: Politics, Law and Economics_ 20 (1): 61–83. -#' \doi{10.1007/s10784-019-09464-5}. -#' @examples -#' node_constraint(ison_southern_women) -#' @export -node_constraint <- function(.data) { - if (manynet::is_twomode(.data)) { - get_constraint_scores <- function(mat) { - inst <- colnames(mat) - rowp <- mat * matrix(1 / rowSums(mat), nrow(mat), ncol(mat)) - colp <- mat * matrix(1 / colSums(mat), nrow(mat), ncol(mat), byrow = T) - res <- vector() - for (i in inst) { - ci <- 0 - membs <- names(which(mat[, i] == 1)) - for (a in membs) { - pia <- colp[a, i] - oth <- membs[membs != a] - pbj <- 0 - if (length(oth) == 1) { - for (j in inst[mat[oth, ] > 0 & inst != i]) { - pbj <- sum(pbj, sum(colp[oth, i] * rowp[oth, j] * colp[a, j])) - } - } else { - for (j in inst[colSums(mat[oth, ]) > 0 & inst != i]) { - pbj <- sum(pbj, sum(colp[oth, i] * rowp[oth, j] * colp[a, j])) - } - } - cia <- (pia + pbj)^2 - ci <- sum(ci, cia) - } - res <- c(res, ci) - } - names(res) <- inst - res - } - inst.res <- get_constraint_scores(manynet::as_matrix(.data)) - actr.res <- get_constraint_scores(t(manynet::as_matrix(.data))) - res <- c(actr.res, inst.res) - } else { - res <- igraph::constraint(manynet::as_igraph(.data), - nodes = igraph::V(.data), - weights = NULL) - } - res <- make_node_measure(res, .data) - res -} - -#' @rdname holes -#' @examples -#' node_hierarchy(ison_adolescents) -#' node_hierarchy(ison_southern_women) -#' @export -node_hierarchy <- function(.data){ - cs <- node_constraint(.data) - g <- manynet::as_igraph(.data) - out <- vapply(igraph::V(g), function(ego){ - n = igraph::neighbors(g, ego) - N <- length(n) - css <- cs[n] - CN <- mean(css) - rj <- css/CN - sum(rj*log(rj)) / (N * log(N)) - }, FUN.VALUE = numeric(1)) - out[is.nan(out)] <- 0 - make_node_measure(out, .data) -} - -#' @rdname holes -#' @importFrom igraph eccentricity -#' @export -node_eccentricity <- function(.data){ - out <- igraph::eccentricity(manynet::as_igraph(.data), - mode = "out") - make_node_measure(out, .data) -} - -#' @rdname holes -#' @importFrom igraph knn -#' @references -#' Barrat, Alain, Marc Barthelemy, Romualdo Pastor-Satorras, and Alessandro Vespignani. 2004. -#' "The architecture of complex weighted networks", -#' _Proc. Natl. Acad. Sci._ 101: 3747. -#' @export -node_neighbours_degree <- function(.data){ - out <- igraph::knn(manynet::as_igraph(.data), - mode = "out")$knn - make_node_measure(out, .data) -} - -#' @rdname holes -#' @export -tie_cohesion <- function(.data){ - ties <- igraph::E(.data) - coins <- data.frame(heads = igraph::head_of(.data, ties), - tails = igraph::tail_of(.data, ties)) - out <- apply(coins, 1, - function(x){ - neigh1 <- igraph::neighbors(.data, x[1]) - neigh2 <- igraph::neighbors(.data, x[2]) - shared_nodes <- sum(c(neigh1 %in% neigh2, - neigh2 %in% neigh1))/2 - neigh_nodes <- length(unique(c(neigh1, neigh2)))-2 - shared_nodes / neigh_nodes - } ) - make_node_measure(out, .data) -} diff --git a/R/measure_over.R b/R/measure_over.R deleted file mode 100644 index c4b327d26..000000000 --- a/R/measure_over.R +++ /dev/null @@ -1,38 +0,0 @@ -#' Helper functions for measuring over splits of networks -#' @inheritParams regression -#' @param FUN A function to run over all splits. -#' @param ... Further arguments to be passed on to FUN. -#' @param attribute A string naming the attribute to be split upon. -#' @param slice Optionally, a vector of specific slices. -#' Otherwise all observed slices will be returned. -#' @name over -NULL - -#' @describeIn over Runs a function, e.g. a measure, -#' over waves of a panel network -#' @export -over_waves <- function(.data, FUN, ..., attribute = "wave", - strategy = "sequential", - verbose = FALSE){ - oplan <- future::plan(strategy) - on.exit(future::plan(oplan), add = TRUE) - furrr::future_map_dbl(manynet::to_waves(.data, attribute), function(j) FUN(j, ...), - .progress = verbose, .options = furrr::furrr_options(seed = T)) -} - -#' @describeIn over Runs a function, e.g. a measure, -#' over time slices of a dynamic network -#' @export -over_time <- function(.data, FUN, ..., attribute = "time", - slice = NULL, - strategy = "sequential", - verbose = FALSE){ - oplan <- future::plan(strategy) - on.exit(future::plan(oplan), add = TRUE) - out <- furrr::future_map_dbl(manynet::to_slices(.data, attribute, slice), - function(j) FUN(j, ...), - .progress = verbose, - .options = furrr::furrr_options(seed = T)) - make_network_measures(out, .data) -} - diff --git a/R/member_cliques.R b/R/member_cliques.R deleted file mode 100644 index 7c99ced49..000000000 --- a/R/member_cliques.R +++ /dev/null @@ -1,128 +0,0 @@ -#' Clique partitioning algorithms -#' -#' @description -#' These functions create a vector of nodes' memberships in -#' cliques: -#' -#' - `node_roulette()` assigns nodes to maximally diverse groups. -#' -#' @section Maximally diverse grouping problem: -#' This well known computational problem is a NP-hard problem -#' with a number of relevant applications, -#' including the formation of groups of students that have encountered -#' each other least or least recently. -#' Essentially, the aim is to return a membership of nodes in cliques -#' that minimises the sum of their previous (weighted) ties: -#' -#' \deqn{\sum_{g=1}^{m} \sum_{i=1}^{n-1} \sum_{j=i+1}^{n} x_{ij} y_{ig} y_{jg}} -#' -#' where \eqn{y_{ig} = 1} if node \eqn{i} is in group \eqn{g}, and 0 otherwise. -#' -#' \eqn{x_{ij}} is the existing network data. -#' If this is an empty network, the function will just return cliques. -#' To run this repeatedly, one can join a clique network of the membership result -#' with the original network, using this as the network data for the next round. -#' -#' A form of the Lai and Hao (2016) iterated maxima search (IMS) is used here. -#' This performs well for small and moderately sized networks. -#' It includes both weak and strong perturbations to an initial solution -#' to ensure that a robust solution from the broader state space is identified. -#' The user is referred to Lai and Hao (2016) and Lai et al (2021) for more details. -#' @inheritParams cohesion -#' @name cliques -#' @family memberships -NULL - -#' @rdname cliques -#' @param num_groups An integer indicating the number of groups desired. -#' @param group_size An integer indicating the desired size of most of the groups. -#' Note that if the number of nodes is not divisible into groups of equal size, -#' there may be some larger or smaller groups. -#' @param times An integer of the number of search iterations the algorithm should complete. -#' By default this is the number of nodes in the network multiplied by the number of groups. -#' This heuristic may be insufficient for small networks and numbers of groups, -#' and burdensome for large networks and numbers of groups, but can be overwritten. -#' At every 10th iteration, a stronger perturbation of a number of successive changes, -#' approximately the number of nodes divided by the number of groups, -#' will take place irrespective of whether it improves the objective function. -#' @references -#' Lai, Xiangjing, and Jin-Kao Hao. 2016. -#' “Iterated Maxima Search for the Maximally Diverse Grouping Problem.” -#' _European Journal of Operational Research_ 254(3):780–800. -#' \doi{10.1016/j.ejor.2016.05.018}. -#' -#' Lai, Xiangjing, Jin-Kao Hao, Zhang-Hua Fu, and Dong Yue. 2021. -#' “Neighborhood Decomposition Based Variable Neighborhood Search and Tabu Search for Maximally Diverse Grouping.” -#' _European Journal of Operational Research_ 289(3):1067–86. -#' \doi{10.1016/j.ejor.2020.07.048}. -#' @export -node_roulette <- function(.data, num_groups, group_size, times = NULL){ - if(missing(num_groups) & missing(group_size)){ - stop(paste("Either `num_groups` must indicate number of groups desired", - "or `group_size` must indicate the desired average size of groups.")) - } - n <- manynet::network_nodes(.data) - my_vec <- sample(seq.int(n)) - # Initial partition - if(!missing(num_groups)){ - out <- cut(seq_along(my_vec), num_groups, labels = FALSE)[my_vec] - } else { - out <- ceiling(seq_along(my_vec) / group_size)[my_vec] - } - if(is.null(times)) times <- n * max(out) - # Get fitness - mat <- manynet::as_matrix(.data) - fit <- sum(.to_cliques(out) * mat) - soln <- out - for(t in seq.int(times)){ - soln <- .weakPerturb(soln) - new_fit <- sum(.to_cliques(soln) * mat) - if(new_fit < fit){ - out <- soln - fit <- new_fit - } - if(t %% 10) soln <- .strongPerturb(soln) - } - make_node_member(out, .data) -} - -.to_cliques <- function(member){ - (member == t(matrix(member, length(member), length(member))))*1 -} - -.weakPerturb <- function(soln){ - gsizes <- table(soln) - evens <- all(gsizes == max(gsizes)) - if(evens){ - soln <- .swapMove(soln) - } else { - if(stats::runif(1)<0.5) soln <- .swapMove(soln) else - soln <- .oneMove(soln) - } - soln -} - -.swapMove <- function(soln){ - from <- sample(seq.int(length(soln)), 1) - to <- sample(which(soln != soln[from]), 1) - soln[c(to,from)] <- soln[c(from,to)] - soln -} - -.oneMove <- function(soln){ - gsizes <- table(soln) - maxg <- which(gsizes == max(gsizes)) - from <- sample(which(soln %in% maxg), 1) - soln[from] <- sample(which(gsizes != max(gsizes)), 1) - soln -} - -.strongPerturb <- function(soln, strength = 1){ - times <- ceiling(strength * length(soln)/max(soln)) - for (t in seq.int(times)){ - soln <- .weakPerturb(soln) - } - soln -} - - diff --git a/R/member_community.R b/R/member_community.R deleted file mode 100644 index dd147062f..000000000 --- a/R/member_community.R +++ /dev/null @@ -1,339 +0,0 @@ -#' Community partitioning algorithms -#' -#' @description -#' These functions offer different algorithms useful for partitioning -#' networks into sets of communities: -#' -#' - `node_optimal()` is a problem-solving algorithm that seeks to maximise -#' modularity over all possible partitions. -#' - `node_kernaghinlin()` is a greedy, iterative, deterministic -#' partitioning algorithm that results in two equally-sized communities. -#' - `node_edge_betweenness()` is a hierarchical, decomposition algorithm -#' where edges are removed in decreasing order of the number of -#' shortest paths passing through the edge. -#' - `node_fast_greedy()` is a hierarchical, agglomerative algorithm, -#' that tries to optimize modularity in a greedy manner. -#' - `node_leading_eigen()` is a top-down, hierarchical algorithm. -#' - `node_walktrap()` is a hierarchical, agglomerative algorithm based on random walks. -#' - `node_infomap()` is a hierarchical algorithm based on the information in random walks. -#' - `node_spinglass()` is a greedy, iterative, probabilistic algorithm, -#' based on analogy to model from statistical physics. -#' - `node_fluid()` is a propogation-based partitioning algorithm, -#' based on analogy to model from fluid dynamics. -#' - `node_louvain()` is an agglomerative multilevel algorithm that seeks to maximise -#' modularity over all possible partitions. -#' - `node_leiden()` is an agglomerative multilevel algorithm that seeks to maximise -#' the Constant Potts Model over all possible partitions. -#' -#' The different algorithms offer various advantages in terms of computation time, -#' availability on different types of networks, ability to maximise modularity, -#' and their logic or domain of inspiration. -#' -#' @inheritParams cohesion -#' @name community -#' @family memberships -NULL - -#' @rdname community -#' @section Optimal: -#' The general idea is to calculate the modularity of all possible partitions, -#' and choose the community structure that maximises this modularity measure. -#' Note that this is an NP-complete problem with exponential time complexity. -#' The guidance in the igraph package is networks of <50-200 nodes is probably fine. -#' @references -#' Brandes, Ulrik, Daniel Delling, Marco Gaertler, Robert Gorke, Martin Hoefer, Zoran Nikoloski, Dorothea Wagner. 2008. -#' "On Modularity Clustering", -#' _IEEE Transactions on Knowledge and Data Engineering_ 20(2):172-188. -#' @examples -#' node_optimal(ison_adolescents) -#' @export -node_optimal <- function(.data){ - out <- igraph::cluster_optimal(manynet::as_igraph(.data) - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @references -#' Kernighan, Brian W., and Shen Lin. 1970. -#' "An efficient heuristic procedure for partitioning graphs." -#' _The Bell System Technical Journal_ 49(2): 291-307. -#' \doi{10.1002/j.1538-7305.1970.tb01770.x} -#' @examples -#' node_kernighanlin(ison_adolescents) -#' node_kernighanlin(ison_southern_women) -#' @export -node_kernighanlin <- function(.data){ - # assign groups arbitrarily - n <- manynet::network_nodes(.data) - group_size <- ifelse(n %% 2 == 0, n/2, (n+1)/2) - - # count internal and external costs of each vertex - g <- manynet::as_matrix(manynet::to_multilevel(.data)) - g1 <- g[1:group_size, 1:group_size] - g2 <- g[(group_size+1):n, (group_size+1):n] - intergroup <- g[1:group_size, (group_size+1):n] - - g2.intcosts <- rowSums(g2) - g2.extcosts <- colSums(intergroup) - - g1.intcosts <- rowSums(g1) - g1.extcosts <- rowSums(intergroup) - - # count edge costs of each vertex - g1.net <- g1.extcosts - g1.intcosts - g2.net <- g2.extcosts - g2.intcosts - - g1.net <- sort(g1.net, decreasing = TRUE) - g2.net <- sort(g2.net, decreasing = TRUE) - - # swap pairs of vertices (one from each group) that give a positive sum of net edge costs - if(length(g1.net)!=length(g2.net)) { - g2.net <- c(g2.net,0) - } else {g2.net} - - sums <- as.integer(unname(g1.net + g2.net)) - # positions in sequence of names at which sum >= 0 - index <- which(sums >= 0 %in% sums) - g1.newnames <- g1.names <- names(g1.net) - g2.newnames <- g2.names <- names(g2.net) - # make swaps based on positions in sequence - for (i in index) { - g1.newnames[i] <- g2.names[i] - g2.newnames[i] <- g1.names[i] - } - - # extract names of vertices in each group after swaps - out <- ifelse(manynet::node_names(.data) %in% g1.newnames, 1, 2) - make_node_member(out, .data) -} - -#' @rdname community -#' @section Edge-betweenness: -#' This is motivated by the idea that edges connecting different groups -#' are more likely to lie on multiple shortest paths when they are the -#' only option to go from one group to another. -#' This method yields good results but is very slow because of -#' the computational complexity of edge-betweenness calculations and -#' the betweenness scores have to be re-calculated after every edge removal. -#' Networks of ~700 nodes and ~3500 ties are around the upper size limit -#' that are feasible with this approach. -#' @references -#' Newman, M, and M Girvan. 2004. -#' "Finding and evaluating community structure in networks." -#' _Physical Review E_ 69: 026113. -#' @examples -#' node_edge_betweenness(ison_adolescents) -#' @export -node_edge_betweenness <- function(.data){ - out <- suppressWarnings(igraph::cluster_edge_betweenness( - manynet::as_igraph(.data))$membership) - make_node_member(out, .data) -} - -#' @rdname community -#' @section Fast-greedy: -#' Initially, each node is assigned a separate community. -#' Communities are then merged iteratively such that each merge -#' yields the largest increase in the current value of modularity, -#' until no further increases to the modularity are possible. -#' The method is fast and recommended as a first approximation -#' because it has no parameters to tune. -#' However, it is known to suffer from a resolution limit. -#' @references -#' Clauset, A, MEJ Newman, MEJ and C Moore. -#' "Finding community structure in very large networks." -#' @examples -#' node_fast_greedy(ison_adolescents) -#' @export -node_fast_greedy <- function(.data){ - out <- igraph::cluster_fast_greedy(manynet::as_igraph(.data) - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @section Leading eigenvector: -#' In each step, the network is bifurcated such that modularity increases most. -#' The splits are determined according to the leading eigenvector of the modularity matrix. -#' A stopping condition prevents tightly connected groups from being split further. -#' Note that due to the eigenvector calculations involved, -#' this algorithm will perform poorly on degenerate networks, -#' but will likely obtain a higher modularity than fast-greedy (at some cost of speed). -#' @references -#' Newman, MEJ. 2006. -#' "Finding community structure using the eigenvectors of matrices" -#' _Physical Review E_ 74:036104. -#' @examples -#' node_leading_eigen(ison_adolescents) -#' @export -node_leading_eigen <- function(.data){ - out <- igraph::cluster_leading_eigen(manynet::as_igraph(.data) - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @section Walktrap: -#' The general idea is that random walks on a network are more likely to stay -#' within the same community because few edges lead outside a community. -#' By repeating random walks of 4 steps many times, -#' information about the hierarchical merging of communities is collected. -#' @param times Integer indicating number of simulations/walks used. -#' By default, `times=50`. -#' @references -#' Pons, Pascal, and Matthieu Latapy -#' "Computing communities in large networks using random walks". -#' @examples -#' node_walktrap(ison_adolescents) -#' @export -node_walktrap <- function(.data, times = 50){ - out <- igraph::cluster_walktrap(manynet::as_igraph(.data), - steps=times)$membership - make_node_member(out, .data) - -} - -#' @rdname community -#' @section Infomap: -#' Motivated by information theoretic principles, this algorithm tries to build -#' a grouping that provides the shortest description length for a random walk, -#' where the description length is measured by the expected number of bits per node required to encode the path. -#' @references -#' Rosvall, M, and C. T. Bergstrom. 2008. -#' "Maps of information flow reveal community structure in complex networks", -#' _PNAS_ 105:1118. -#' \doi{10.1073/pnas.0706851105} -#' -#' Rosvall, M., D. Axelsson, and C. T. Bergstrom. 2009. -#' "The map equation", -#' _Eur. Phys. J. Special Topics_ 178: 13. -#' \doi{10.1140/epjst/e2010-01179-1} -#' @examples -#' node_infomap(ison_adolescents) -#' @export -node_infomap <- function(.data, times = 50){ - out <- igraph::cluster_infomap(manynet::as_igraph(.data), - nb.trials = times - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @param max_k Integer constant, the number of spins to use as an upper limit -#' of communities to be found. Some sets can be empty at the end. -#' @param resolution The Reichardt-Bornholdt “gamma” resolution parameter for modularity. -#' By default 1, making existing and non-existing ties equally important. -#' Smaller values make existing ties more important, -#' and larger values make missing ties more important. -#' @section Spin-glass: -#' This is motivated by analogy to the Potts model in statistical physics. -#' Each node can be in one of _k_ "spin states", -#' and ties (particle interactions) provide information about which pairs of nodes -#' want similar or different spin states. -#' The final community definitions are represented by the nodes' spin states -#' after a number of updates. -#' A different implementation than the default is used in the case of signed networks, -#' such that nodes connected by negative ties will be more likely found in separate communities. -#' @references -#' Reichardt, Jorg, and Stefan Bornholdt. 2006. -#' "Statistical Mechanics of Community Detection" -#' _Physical Review E_, 74(1): 016110–14. -#' \doi{10.1073/pnas.0605965104} -#' -#' Traag, VA, and Jeroen Bruggeman. 2008. -#' "Community detection in networks with positive and negative links". -#' @examples -#' node_spinglass(ison_adolescents) -#' @export -node_spinglass <- function(.data, max_k = 200, resolution = 1){ - out <- igraph::cluster_spinglass(manynet::as_igraph(.data), - spins = max_k, gamma = resolution, - implementation = ifelse(manynet::is_signed(.data), "neg", "orig") - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @section Fluid: -#' The general idea is to observe how a discrete number of fluids interact, expand and contract, -#' in a non-homogenous environment, i.e. the network structure. -#' Unlike the `{igraph}` implementation that this function wraps, -#' this function iterates over all possible numbers of communities and returns the membership -#' associated with the highest modularity. -#' @references -#' Parés F, Gasulla DG, et. al. 2018. -#' "Fluid Communities: A Competitive, Scalable and Diverse Community Detection Algorithm". -#' In: _Complex Networks & Their Applications VI_ -#' Springer, 689: 229. -#' \doi{10.1007/978-3-319-72150-7_19} -#' @examples -#' node_fluid(ison_adolescents) -#' @export -node_fluid <- function(.data) { - .data <- as_igraph(.data) - mods <- vapply(seq.int(manynet::network_nodes(.data)), function(x) - igraph::modularity(.data, membership = igraph::membership( - igraph::cluster_fluid_communities(.data, x))), - FUN.VALUE = numeric(1)) - out <- igraph::membership(igraph::cluster_fluid_communities( - .data, no.of.communities = which.max(mods))) - make_node_member(out, .data) -} - -#' @rdname community -#' @section Louvain: -#' The general idea is to take a hierarchical approach to optimising the modularity criterion. -#' Nodes begin in their own communities and are re-assigned in a local, greedy way: -#' each node is moved to the community where it achieves the highest contribution to modularity. -#' When no further modularity-increasing reassignments are possible, -#' the resulting communities are considered nodes (like a reduced graph), -#' and the process continues. -#' @references -#' Blondel, Vincent, Jean-Loup Guillaume, Renaud Lambiotte, Etienne Lefebvre. 2008. -#' "Fast unfolding of communities in large networks", -#' _J. Stat. Mech._ P10008. -#' @examples -#' node_louvain(ison_adolescents) -#' @export -node_louvain <- function(.data, resolution = 1){ - out <- igraph::cluster_louvain(manynet::as_igraph(.data), - resolution = resolution - )$membership - make_node_member(out, .data) -} - -#' @rdname community -#' @section Leiden: -#' The general idea is to optimise the Constant Potts Model, -#' which does not suffer from the resolution limit, instead of modularity. -#' As outlined in the `{igraph}` package, -#' the Constant Potts Model object function is: -#' -#' \deqn{\frac{1}{2m} \sum_{ij}(A_{ij}-\gamma n_i n_j)\delta(\sigma_i, \sigma_j)} -#' -#' where _m_ is the total tie weight, -#' \eqn{A_{ij}} is the tie weight between _i_ and _j_, -#' \eqn{\gamma} is the so-called resolution parameter, -#' \eqn{n_i} is the node weight of node _i_, -#' and \eqn{\delta(\sigma_i, \sigma_j) = 1} if and only if -#' _i_ and _j_ are in the same communities and 0 otherwise. -#' @references -#' Traag, V. A., L Waltman, and NJ van Eck. 2019. -#' "From Louvain to Leiden: guaranteeing well-connected communities", -#' _Scientific Reports_, 9(1):5233. -#' \doi{10.1038/s41598-019-41695-z} -#' @examples -#' node_leiden(ison_adolescents) -#' @export -node_leiden <- function(.data, resolution = 1){ - if(manynet::is_weighted(.data)){ # Traag resolution default - n <- manynet::network_nodes(.data) - resolution <- sum(manynet::tie_weights(.data))/(n*(n - 1)/2) - } - out <- igraph::cluster_leiden(manynet::as_igraph(.data), - resolution_parameter = resolution - )$membership - make_node_member(out, .data) -} - diff --git a/R/member_components.R b/R/member_components.R deleted file mode 100644 index 3190646d8..000000000 --- a/R/member_components.R +++ /dev/null @@ -1,66 +0,0 @@ -#' Component partitioning algorithms -#' -#' @description -#' These functions create a vector of nodes' memberships in -#' components or degrees of coreness: -#' -#' - `node_components()` assigns nodes' component membership -#' using edge direction where available. -#' - `node_weak_components()` assigns nodes' component membership -#' ignoring edge direction. -#' - `node_strong_components()` assigns nodes' component membership -#' based on edge direction. -#' - `node_roulette()` -#' -#' In graph theory, components, sometimes called connected components, -#' are induced subgraphs from partitioning the nodes into disjoint sets. -#' All nodes that are members of the same partition as _i_ are reachable -#' from _i_. -#' -#' For directed networks, -#' strongly connected components consist of subgraphs where there are paths -#' in each direction between member nodes. -#' Weakly connected components consist of subgraphs where there is a path -#' in either direction between member nodes. -#' -#' Coreness captures the maximal subgraphs in which each vertex has at least -#' degree _k_, where _k_ is also the order of the subgraph. -#' As described in `igraph::coreness`, -#' a node's coreness is _k_ if it belongs to the _k_-core -#' but not to the (_k_+1)-core. -#' @inheritParams cohesion -#' @name components -#' @family memberships -NULL - -#' @rdname components -#' @importFrom igraph components -#' @examples -#' node_components(mpn_bristol) -#' @export -node_components <- function(.data){ - if(!manynet::is_graph(.data)) .data <- manynet::as_igraph(.data) - make_node_member(igraph::components(.data, mode = "strong")$membership, - .data) -} - -#' @rdname components -#' @importFrom igraph components -#' @export -node_weak_components <- function(.data){ - if(!manynet::is_graph(.data)) .data <- manynet::as_igraph(.data) - make_node_member(igraph::components(.data, mode = "weak")$membership, - .data) -} - -#' @rdname components -#' @importFrom igraph components -#' @export -node_strong_components <- function(.data){ - if(!manynet::is_graph(.data)) .data <- manynet::as_igraph(.data) - make_node_member(igraph::components(.data, mode = "strong")$membership, - .data) -} - - - diff --git a/R/member_core.R b/R/member_core.R deleted file mode 100644 index a4feea22d..000000000 --- a/R/member_core.R +++ /dev/null @@ -1,80 +0,0 @@ -#' Core-periphery clustering algorithms -#' @description -#' These functions identify nodes belonging to (some level of) the core of a network: -#' -#' - `node_core()` assigns nodes to either the core or periphery. -#' - `node_coreness()` assigns nodes to their level of k-coreness. -#' -#' @inheritParams cohesion -#' @param method Which method to use to identify cores and periphery. -#' By default this is "degree", -#' which relies on the heuristic that high degree nodes are more likely to be in the core. -#' An alternative is "eigenvector", which instead begins with high eigenvector nodes. -#' Other methods, such as a genetic algorithm, CONCOR, and Rombach-Porter, -#' can be added if there is interest. -#' @name core -#' @family memberships -NULL - -#' @rdname core -#' @section Core-periphery: -#' This function is used to identify which nodes should belong to the core, -#' and which to the periphery. -#' It seeks to minimize the following quantity: -#' \deqn{Z(S_1) = \sum_{(i% as_tidygraph %>% -#' # mutate(corep = node_core(mpn_elite_usa_advice)) %>% -#' # autographr(node_color = "corep") -#' network_core(mpn_elite_usa_advice) -#' @export -node_core <- function(.data, method = c("degree", "eigenvector")){ - method <- match.arg(method) - if(manynet::is_directed(.data)) warning("Asymmetric core-periphery not yet implemented.") - if(method == "degree"){ - degi <- node_degree(.data, normalized = FALSE, - alpha = ifelse(manynet::is_weighted(.data), 1, 0)) - } else if (method == "eigenvector") { - degi <- node_eigenvector(.data, normalized = FALSE) - } else stop("This function expects either 'degree' or 'eigenvector' method to be specified.") - nord <- order(degi, decreasing = TRUE) - zbest <- manynet::network_nodes(.data)*3 - kbest <- 0 - z <- 1/2*sum(degi) - for(k in 1:(manynet::network_nodes(.data)-1)){ - z <- z + k - 1 - degi[nord][k] - if(z < zbest){ - zbest <- z - kbest <- k - } - } - out <- ifelse(seq_len(manynet::network_nodes(.data)) %in% nord[seq_len(kbest)], - 1,2) - make_node_member(out, .data) -} - -#' @rdname core -#' @examples -#' node_coreness(ison_adolescents) -#' @export -node_coreness <- function(.data){ - if(!manynet::is_graph(.data)) .data <- manynet::as_igraph(.data) - out <- igraph::coreness(.data) - make_node_member(out, .data) -} - diff --git a/R/member_equivalence.R b/R/member_equivalence.R deleted file mode 100644 index 040cb6316..000000000 --- a/R/member_equivalence.R +++ /dev/null @@ -1,139 +0,0 @@ -#' Equivalence clustering algorithms -#' -#' @description -#' These functions combine an appropriate `_census()` function -#' together with methods for calculating the hierarchical clusters -#' provided by a certain distance calculation. -#' -#' - `node_equivalence()` assigns nodes membership based on their equivalence -#' with respective to some census/class. -#' The following functions call this function, together with an appropriate census. -#' - `node_structural_equivalence()` assigns nodes membership based on their -#' having equivalent ties to the same other nodes. -#' - `node_regular_equivalence()` assigns nodes membership based on their -#' having equivalent patterns of ties. -#' - `node_automorphic_equivalence()` assigns nodes membership based on their -#' having equivalent distances to other nodes. -#' -#' A `plot()` method exists for investigating the dendrogram -#' of the hierarchical cluster and showing the returned cluster -#' assignment. -#' @name equivalence -#' @family memberships -#' @inheritParams cohesion -#' @param census A matrix returned by a `node_*_census()` function. -#' @param k Typically a character string indicating which method -#' should be used to select the number of clusters to return. -#' By default `"silhouette"`, other options include `"elbow"` and `"strict"`. -#' `"strict"` returns classes with members only when strictly equivalent. -#' `"silhouette"` and `"elbow"` select classes based on the distance between -#' clusters or between nodes within a cluster. -#' Fewer, identifiable letters, e.g. `"e"` for elbow, is sufficient. -#' Alternatively, if `k` is passed an integer, e.g. `k = 3`, -#' then all selection routines are skipped in favour of this number of clusters. -#' @param cluster Character string indicating whether clusters should be -#' clustered hierarchically (`"hierarchical"`) or -#' through convergence of correlations (`"concor"`). -#' Fewer, identifiable letters, e.g. `"c"` for CONCOR, is sufficient. -#' @param distance Character string indicating which distance metric -#' to pass on to `stats::dist`. -#' By default `"euclidean"`, but other options include -#' `"maximum"`, `"manhattan"`, `"canberra"`, `"binary"`, and `"minkowski"`. -#' Fewer, identifiable letters, e.g. `"e"` for Euclidean, is sufficient. -#' @param range Integer indicating the maximum number of (k) clusters -#' to evaluate. -#' Ignored when `k = "strict"` or a discrete number is given for `k`. -#' @importFrom stats as.dist hclust cutree coef cor median -#' @importFrom sna gcor -#' @source \url{https://github.com/aslez/concoR} -NULL - -#' @rdname equivalence -#' @export -node_equivalence <- function(.data, census, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski"), - range = 8L){ - hc <- switch(match.arg(cluster), - hierarchical = cluster_hierarchical(`if`(manynet::is_twomode(.data), - manynet::to_onemode(census), census), - match.arg(distance)), - concor = cluster_concor(.data, census)) - - if(!is.numeric(k)) - k <- switch(match.arg(k), - strict = k_strict(hc, .data), - elbow = k_elbow(hc, .data, census, range), - silhouette = k_silhouette(hc, .data, range)) - - out <- make_node_member(stats::cutree(hc, k), .data) - attr(out, "hc") <- hc - attr(out, "k") <- k - out -} - -#' @rdname equivalence -#' @examples -#' \donttest{ -#' (nse <- node_structural_equivalence(mpn_elite_usa_advice)) -#' plot(nse) -#' } -#' @export -node_structural_equivalence <- function(.data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski"), - range = 8L){ - mat <- node_tie_census(.data) - if(any(colSums(t(mat))==0)){ - mat <- cbind(mat, (colSums(t(mat))==0)) - } - node_equivalence(.data, mat, - k = k, cluster = cluster, distance = distance, range = range) -} - -#' @rdname equivalence -#' @examples -#' \donttest{ -#' (nre <- node_regular_equivalence(mpn_elite_usa_advice, -#' cluster = "concor")) -#' plot(nre) -#' } -#' @export -node_regular_equivalence <- function(.data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski"), - range = 8L){ - if(manynet::is_twomode(.data)){ - mat <- as.matrix(node_quad_census(.data)) - } else { - mat <- node_triad_census(.data) - } - if(any(colSums(mat) == 0)) mat <- mat[,-which(colSums(mat) == 0)] - node_equivalence(.data, mat, - k = k, cluster = cluster, distance = distance, range = range) -} - -#' @rdname equivalence -#' @examples -#' \donttest{ -#' (nae <- node_automorphic_equivalence(mpn_elite_usa_advice, -#' k = "elbow")) -#' plot(nae) -#' } -#' @export -node_automorphic_equivalence <- function(.data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski"), - range = 8L){ - mat <- node_path_census(.data) - node_equivalence(.data, mat, - k = k, cluster = cluster, distance = distance, range = range) -} diff --git a/R/migraph-defunct.R b/R/migraph-defunct.R index 3c0ead6a1..f46e42847 100644 --- a/R/migraph-defunct.R +++ b/R/migraph-defunct.R @@ -11,355 +11,10 @@ #' @keywords internal NULL -#' @describeIn defunct Deprecated on 2022-06-28. +#' @describeIn defunct Deprecated on 2024-06-16. #' @export -edge_betweenness <- function(object, normalized = TRUE){ - .Deprecated("tie_betweenness", package = "migraph", - old = "edge_betweenness") - tie_betweenness(object, normalized) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_closeness <- function(object, normalized = TRUE){ - .Deprecated("tie_closeness", package = "migraph", - old = "edge_closeness") - tie_closeness(object, normalized) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_degree <- function(object, normalized = TRUE){ - .Deprecated("tie_degree", package = "migraph", - old = "edge_degree") - tie_degree(object, normalized) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_eigenvector <- function(object, normalized = TRUE){ - .Deprecated("tie_eigenvector", package = "migraph", - old = "edge_eigenvector") - tie_eigenvector(object, normalized) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_loop <- function(object){ - .Deprecated("tie_is_loop", package = "migraph", - old = "edge_loop") - manynet::tie_is_loop(object) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_multiple <- function(object){ - .Deprecated("tie_is_multiple", package = "migraph", - old = "edge_multiple") - manynet::tie_is_multiple(object) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_bridges <- function(object){ - .Deprecated("tie_is_bridge", package = "migraph", - old = "edge_bridges") - manynet::tie_is_bridge(object) -} - -#' @describeIn defunct Deprecated on 2022-06-28. -#' @export -edge_reciprocal <- function(object){ - .Deprecated("tie_is_reciprocated", package = "migraph", - old = "edge_reciprocal") - manynet::tie_is_reciprocated(object) -} - -#' @describeIn defunct Deprecated on 2022-06-30. -#' @export -node_cuts <- function(object){ - .Deprecated("node_is_cutpoint", package = "migraph", - old = "node_cuts") - manynet::node_is_cutpoint(object) -} - -#' @describeIn defunct Deprecated on 2022-09-10. -#' @export -graph_blau_index <- function(object, attribute, clusters = NULL) { - .Deprecated("network_diversity", package = "migraph", - old = "graph_blau_index") - network_diversity(object, attribute = attribute, clusters = clusters) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_diversity <- function(object, attribute, clusters = NULL) { - .Deprecated("network_diversity", package = "migraph", - old = "graph_diversity") - network_diversity(object, attribute = attribute, clusters = clusters) -} - -#' @describeIn defunct Deprecated on 2022-09-10. -#' @export -graph_ei_index <- function(object, attribute) { - .Deprecated("network_homophily", package = "migraph", - old = "graph_ei_index") - network_homophily(object, attribute = attribute) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_homophily <- function(object, attribute) { - .Deprecated("network_homophily", package = "migraph", - old = "graph_homophily") - network_homophily(object, attribute = attribute) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -plot.graph_test <- function(x, ..., - threshold = .95, - tails = c("two", "one")) { - .Deprecated("plot.network_test", package = "migraph", - old = "plot.graph_test") - plot.network_test(x, ..., - threshold = threshold, - tails = tails) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -print.graph_test <- function(x, ..., - max.length = 6, - digits = 3) { - .Deprecated("print.network_test", package = "migraph", - old = "print.graph_test") - print.network_test(x, ..., - max.length = max.length, - digits = digits) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -print.graph_measure <- function(x, ..., - digits = 3) { - .Deprecated("print.network_measure", package = "migraph", - old = "print.graph_measure") - print.network_measure(x, ..., - digits = digits) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -print.graph_motif <- function(x, ...) { - .Deprecated("print.network_motif", package = "migraph", - old = "print.graph_motif") - print.network_motif(x, ...) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_adhesion <- function(object) { - .Deprecated("network_adhesion", package = "migraph", - old = "graph_adhesion") - network_adhesion(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_cohesion <- function(object) { - .Deprecated("network_cohesion", package = "migraph", - old = "graph_cohesion") - network_cohesion(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_assortativity <- function(object) { - .Deprecated("network_assortativity", package = "migraph", - old = "graph_assortativity") - network_assortativity(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_balance <- function(object) { - .Deprecated("network_balance", package = "migraph", - old = "graph_balance") - network_balance(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_betweenness <- function(object, normalized = TRUE, - direction = c("all", "out", "in")) { - .Deprecated("network_betweenness", package = "migraph", - old = "graph_betweenness") - network_betweenness(object, normalized = normalized, - direction = direction) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_closeness <- function(object, normalized = TRUE, - direction = c("all", "out", "in")) { - .Deprecated("network_closeness", package = "migraph", - old = "graph_closeness") - network_closeness(object, normalized = normalized, - direction = direction) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_degree <- function(object, normalized = TRUE, - direction = c("all", "out", "in")) { - .Deprecated("network_degree", package = "migraph", - old = "graph_degree") - network_degree(object, normalized = normalized, - direction = direction) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_eigenvector <- function(object, normalized = TRUE) { - .Deprecated("network_eigenvector", package = "migraph", - old = "graph_eigenvector") - network_eigenvector(object, normalized = normalized) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_components <- function(object) { - .Deprecated("network_components", package = "migraph", - old = "graph_components") - network_components(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_factions <- function(object, membership = NULL) { - .Deprecated("network_factions", package = "migraph", - old = "graph_factions") - network_factions(object, membership = membership) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_congruency <- function(object, object2) { - .Deprecated("network_congruency", package = "migraph", - old = "graph_congruency") - network_congruency(object, object2) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_equivalency <- function(object) { - .Deprecated("network_equivalency", package = "migraph", - old = "graph_equivalency") - network_equivalency(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_core <- function(object, membership = NULL) { - .Deprecated("network_core", package = "migraph", - old = "graph_core") - network_core(object, membership = membership) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_density <- function(object) { - .Deprecated("network_density", package = "migraph", - old = "graph_density") - network_density(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_reciprocity <- function(object, method = "default") { - .Deprecated("network_reciprocity", package = "migraph", - old = "graph_reciprocity") - network_reciprocity(object, method = method) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_transitivity <- function(object) { - .Deprecated("network_transitivity", package = "migraph", - old = "graph_transitivity") - network_transitivity(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_diameter <- function(object) { - .Deprecated("network_diameter", package = "migraph", - old = "graph_diameter") - network_diameter(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_length <- function(object) { - .Deprecated("network_length", package = "migraph", - old = "graph_length") - network_length(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_dyad_census <- function(object) { - .Deprecated("network_dyad_census", package = "migraph", - old = "graph_dyad_census") - network_dyad_census(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_triad_census <- function(object) { - .Deprecated("network_triad_census", package = "migraph", - old = "graph_triad_census") - network_triad_census(object) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_mixed_census <- function(object, object2) { - .Deprecated("network_mixed_census", package = "migraph", - old = "graph_mixed_census") - network_mixed_census(object, object2) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_modularity <- function(object, membership = NULL, resolution = 1) { - .Deprecated("network_modularity", package = "migraph", - old = "graph_modularity") - network_modularity(object, membership = membership, - resolution = resolution) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -graph_smallworld <- function(object, times = 100) { - .Deprecated("network_smallworld", package = "migraph", - old = "graph_smallworld") - network_smallworld(object, times = times) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -network_homophily <- function(object, attribute) { - .Deprecated("network_heterophily", package = "migraph", - old = "network_homophily") - network_heterophily(object, attribute) -} - -#' @describeIn defunct Deprecated on 2022-09-25. -#' @export -node_homophily <- function(object, attribute) { - .Deprecated("node_heterophily", package = "migraph", - old = "node_homophily") - node_heterophily(object, attribute) +test_gof <- function(diff_model, diff_models) { + .Deprecated("test_fit", package = "migraph", + old = "test_gof") + test_fit(diff_model, diff_models) } diff --git a/R/migraph-package.R b/R/migraph-package.R index a65cf6430..4d78ffe7e 100644 --- a/R/migraph-package.R +++ b/R/migraph-package.R @@ -4,3 +4,18 @@ ## usethis namespace: start ## usethis namespace: end NULL + +# Helper function for checking and downloading packages +thisRequires <- function(pkgname){ + if (!requireNamespace(pkgname, quietly = TRUE)) { + if(utils::askYesNo(msg = paste("The", pkgname, + "package is required to run this function. Would you like to install", pkgname, "from CRAN?"))) { + utils::install.packages(pkgname) + } else { + stop(paste("Please install", pkgname, "from CRAN to run this function.")) + } + } +} + +# defining global variables more centrally +utils::globalVariables(c(".data", ".graph_context")) \ No newline at end of file diff --git a/R/model_cluster.R b/R/model_cluster.R deleted file mode 100644 index c4b9f2463..000000000 --- a/R/model_cluster.R +++ /dev/null @@ -1,110 +0,0 @@ -#' Methods for equivalence clustering -#' -#' @description -#' These functions are used to cluster some census object: -#' -#' - `cluster_hierarchical()` returns a hierarchical clustering object -#' created by `stats::hclust()`. -#' - `cluster_concor()` returns a hierarchical clustering object -#' created from a convergence of correlations procedure (CONCOR). -#' -#' These functions are not intended to be called directly, -#' but are called within `node_equivalence()` and related functions. -#' They are exported and listed here to provide more detailed documentation. -#' @name cluster -#' @inheritParams equivalence -NULL - -#' @rdname cluster -#' @export -cluster_hierarchical <- function(census, distance){ - correlations <- cor(t(census)) - dissimilarity <- 1 - correlations - distances <- stats::dist(dissimilarity, method = distance) - hc <- stats::hclust(distances) - hc$distances <- distances - hc -} - -# cluster_concor(ison_adolescents) -# cluster_concor(ison_southern_women) -# https://github.com/bwlewis/hclust_in_R/blob/master/hc.R - -#' @rdname cluster -#' @section CONCOR: -#' -#' First a matrix of Pearson correlation coefficients between each pair of nodes -#' profiles in the given census is created. -#' Then, again, we find the correlations of this square, symmetric matrix, -#' and continue to do this iteratively until each entry is either `1` or `-1`. -#' These values are used to split the data into two partitions, -#' with members either holding the values `1` or `-1`. -#' This procedure from census to convergence is then repeated within each block, -#' allowing further partitions to be found. -#' Unlike UCINET, partitions are continued until there are single members in -#' each partition. -#' Then a distance matrix is constructed from records of in which partition phase -#' nodes were separated, -#' and this is given to `stats::hclust()` so that dendrograms etc can be returned. -#' @importFrom stats complete.cases -#' @references -#' Breiger, Ronald L., Scott A. Boorman, and Phipps Arabie. 1975. -#' "An Algorithm for Clustering Relational Data with Applications to -#' Social Network Analysis and Comparison with Multidimensional Scaling". -#' _Journal of Mathematical Psychology_, 12: 328-83. -#' \doi{10.1016/0022-2496(75)90028-0}. -#' @export -cluster_concor <- function(.data, census){ - split_cor <- function(m0, cutoff = 1) { - if (ncol(m0) < 2 | all(stats::cor(m0)==1)) list(m0) - else { - mi <- stats::cor(m0) - while (any(abs(mi) <= cutoff)) { - mi <- cor(mi) - cutoff <- cutoff - 0.0001 - } - group <- mi[, 1] > 0 - list(m0[, group, drop = FALSE], - m0[, !group, drop = FALSE]) - } - } - p_list <- list(t(census)) - p_group <- list() - i <- 1 - while(!all(vapply(p_list, function(x) ncol(x)==1, logical(1)))){ - p_list <- unlist(lapply(p_list, - function(y) split_cor(y)), - recursive = FALSE) - p_group[[i]] <- lapply(p_list, function(z) colnames(z)) - if(i > 2 && length(p_group[[i]]) == length(p_group[[i-1]])) break - i <- i+1 - } - - merges <- sapply(rev(1:(i-1)), - function(p) lapply(p_group[[p]], - function(s){ - g <- match(s, manynet::node_names(.data)) - if(length(g)==1) c(g, 0, p) else - if(length(g)==2) c(g, p) else - c(t(cbind(t(utils::combn(g, 2)), p))) - } )) - merges <- c(merges, - list(c(t(cbind(t(utils::combn(seq_len(manynet::network_nodes(.data)), 2)), 0))))) - merged <- matrix(unlist(merges), ncol = 3, byrow = TRUE) - merged <- merged[!duplicated(merged[,1:2]),] - merged[,3] <- abs(merged[,3] - max(merged[,3])) - merged[merged == 0] <- NA - merged <- merged[stats::complete.cases(merged),] - merged <- as.data.frame(merged) - names(merged) <- c("from","to","weight") - - distances <- manynet::as_matrix(manynet::as_igraph(merged)) - distances <- distances + t(distances) - # distances <- distances[-which(rownames(distances)==0),-which(colnames(distances)==0)] - if(manynet::is_labelled(.data)) - rownames(distances) <- colnames(distances) <- manynet::node_names(.data) - hc <- hclust(d = as.dist(distances)) - hc$method <- "concor" - hc$distances <- distances - hc -} diff --git a/R/model_k.R b/R/model_k.R deleted file mode 100644 index 16e5642aa..000000000 --- a/R/model_k.R +++ /dev/null @@ -1,141 +0,0 @@ -#' Methods for selecting clusters -#' -#' @description -#' These functions help select the number of clusters to return from `hc`, -#' some hierarchical clustering object: -#' -#' - `k_strict()` selects a number of clusters in which there is no -#' distance between cluster members. -#' - `k_elbow()` selects a number of clusters in which there is -#' a fair trade-off between parsimony and fit according to the elbow method. -#' - `k_silhouette()` selects a number of clusters that -#' optimises the silhouette score. -#' -#' These functions are generally not user-facing but used internally -#' in e.g. the `*_equivalence()` functions. -#' -#' @inheritParams cohesion -#' @param hc A hierarchical clustering object. -#' @references -#' Thorndike, Robert L. 1953. -#' "Who Belongs in the Family?". -#' _Psychometrika_, 18(4): 267–76. -#' \doi{10.1007/BF02289263}. -#' -#' Rousseeuw, Peter J. 1987. -#' “Silhouettes: A Graphical Aid to the Interpretation and Validation of Cluster Analysis.” -#' _Journal of Computational and Applied Mathematics_, 20: 53–65. -#' \doi{10.1016/0377-0427(87)90125-7}. -#' @name kselect -NULL - -#' @rdname kselect -#' @export -k_strict <- function(hc, .data){ - zero_merged <- hc$merge[round(hc$height,4) == 0,] - k <- nrow(zero_merged) + manynet::network_nodes(.data) - sum(zero_merged < 0) + sum(zero_merged > 0) - k -} - -#' @rdname kselect -#' @param census A motif census object. -#' @param range An integer indicating the maximum number of options to consider. -#' The minimum of this and the number of nodes in the network is used. -#' @export -k_elbow <- function(hc, .data, census, range){ - - clusterCorr <- function(observed_cor_matrix, cluster_vector) { - num_vertices = nrow(observed_cor_matrix) - cluster_cor_mat <- observed_cor_matrix - - obycor <- function(i, j) - mean(observed_cor_matrix[which(cluster_vector[row(observed_cor_matrix)] == - cluster_vector[i] & - cluster_vector[col(observed_cor_matrix)] == - cluster_vector[j])]) - obycor_v <- Vectorize(obycor) - cluster_cor_mat <- outer(1:num_vertices, - 1:num_vertices, - obycor_v) - dimnames(cluster_cor_mat) <- dimnames(observed_cor_matrix) - cluster_cor_mat - } - - elbow_finder <- function(x_values, y_values) { - # Max values to create line - if(min(x_values)==1) x_values <- x_values[2:length(x_values)] - if(min(y_values)==0) y_values <- y_values[2:length(y_values)] - max_df <- data.frame(x = c(min(x_values), max(x_values)), - y = c(min(y_values), max(y_values))) - # Creating straight line between the max values - fit <- stats::lm(max_df$y ~ max_df$x) - # Distance from point to line - distances <- vector() - for (i in seq_len(length(x_values))) { - distances <- c(distances, - abs(stats::coef(fit)[2]*x_values[i] - - y_values[i] + - coef(fit)[1]) / - sqrt(stats::coef(fit)[2]^2 + 1^2)) - } - # Max distance point - x_max_dist <- x_values[which.max(distances)] - x_max_dist - } - - vertices <- manynet::network_nodes(.data) - observedcorrelation <- cor(t(census)) - - resultlist <- list() - correlations <- vector() - for (i in 2:min(range, vertices)) { - cluster_result <- list(label = NA, clusters = NA, correlation = NA) - cluster_result$label <- paste("number of clusters: ", - i) - clusters <- stats::cutree(hc, k = i) - cluster_result$clusters <- clusters - cluster_cor_mat <- clusterCorr(observedcorrelation, clusters) - clustered_observed_cors <- sna::gcor(cluster_cor_mat, observedcorrelation) - cluster_result$correlation <- (clustered_observed_cors) - resultlist <- c(resultlist, cluster_result) - correlations <- c(correlations, clustered_observed_cors) - } - - resultlist$correlations <- c(correlations) - dafr <- data.frame(clusters = 2:min(range, vertices), - correlations = c(correlations)) - correct <- NULL # to satisfy the error god - - # k identification method - elbow_finder(dafr$clusters, dafr$correlations) -} - -#' @rdname kselect -#' @export -k_silhouette <- function(hc, .data, range){ - kcs <- 2:min(range, manynet::network_nodes(.data)) - ns <- seq_len(manynet::network_nodes(.data)) - distances <- hc$distances - ks <- vector() - for(kc in kcs){ - cand <- stats::cutree(hc, kc) - ai <- vector() - bi <- vector() - for(i in ns){ - wig <- which(cand == cand[i]) - wig <- wig[wig != i] - ai <- c(ai, - ifelse(length(wig)==0, - 0, mean(as.matrix(distances)[i, wig]))) - wog <- which(cand != cand[i]) - bi <- c(bi, min(vapply(unique(cand[wog]), function(b){ - mean(as.matrix(distances)[i, wog[cand[wog]==b]]) - }, FUN.VALUE = numeric(1)))) - } - si <- (bi - ai)/ - apply(data.frame(ai, bi), 1, max) - ks <- c(ks, mean(si)) - } - k <- which(ks == max(ks)) + 1 - k -} diff --git a/R/model_regression.R b/R/model_regression.R index 81753cc9d..a79e54b07 100644 --- a/R/model_regression.R +++ b/R/model_regression.R @@ -45,7 +45,8 @@ #' 'ego' is excluded from these calculations. #' See Haunss and Hollway (2023) for more on this effect. #' - dyadic covariates (other networks) can just be named -#' @inheritParams cohesion +#' @param .data A manynet-consistent network. +#' See e.g. `manynet::as_tidygraph()` for more details. #' @param method A method for establishing the null hypothesis. #' Note that "qap" uses Dekker et al's (2007) double semi-partialling technique, #' whereas "qapy" permutes only the $y$ variable. @@ -72,7 +73,6 @@ #' @importFrom future plan #' @importFrom furrr future_map_dfr furrr_options #' @importFrom stats glm.fit as.formula df.residual pchisq -#' @seealso `vignette("p7linearmodel")` #' @references #' Krackhardt, David. 1988. #' “Predicting with Networks: Nonparametric Multiple Regression Analysis of Dyadic Data.” @@ -293,7 +293,7 @@ convertToMatrixList <- function(formula, data){ DV <- manynet::as_matrix(data) } else DV <- manynet::as_matrix(data) IVnames <- getRHSNames(formula) - specificationAdvice(IVnames) + specificationAdvice(IVnames, data) IVs <- lapply(IVnames, function(IV){ out <- lapply(seq_along(IV), function(elem){ # ego #### @@ -445,7 +445,7 @@ getDependentName <- function(formula) { unlist(lapply(dep, deparse)) } -specificationAdvice <- function(formula){ +specificationAdvice <- function(formula, data){ formdf <- t(data.frame(formula)) if(any(formdf[,1] %in% c("sim","same"))){ vars <- formdf[formdf[,1] %in% c("sim","same"), 2] @@ -456,6 +456,7 @@ specificationAdvice <- function(formula){ # incl }, FUN.VALUE = character(1)) suggests <- suggests[!is.na(suggests)] + if(!manynet::is_directed(data)) suggests <- suggests[!grepl("ego\\(", suggests)] if(length(suggests)>0){ if(length(suggests) > 1) suggests <- paste0(suggests, collapse = ", ") diff --git a/R/model_tests.R b/R/model_tests.R index 39b054e76..29133a288 100644 --- a/R/model_tests.R +++ b/R/model_tests.R @@ -1,4 +1,6 @@ -#' Conditional uniform graph and permutation tests +# Tests of network measures #### + +#' Tests of network measures #' #' @description #' These functions conduct tests of any network-level statistic: @@ -9,8 +11,6 @@ #' - `test_permutation()` performs a quadratic assignment procedure (QAP) test #' of a measure against a distribution of measures on permutations #' of the original network. -#' - `test_gof()` performs a chi-squared test on the squared Mahalanobis distance -#' between a diff_model and diff_models objects. #' #' @name tests #' @inheritParams regression @@ -21,29 +21,29 @@ NULL #' @rdname tests +#' @importFrom manynet generate_random bind_node_attributes is_directed is_complex #' @examples #' marvel_friends <- to_unsigned(ison_marvel_relationships) #' marvel_friends <- to_giant(marvel_friends) %>% #' to_subgraph(PowerOrigin == "Human") -#' (cugtest <- test_random(marvel_friends, network_heterophily, attribute = "Attractive", -#' times = 200)) -#' plot(cugtest) +#' # (cugtest <- test_random(marvel_friends, manynet::net_heterophily, attribute = "Attractive", +#' # times = 200)) +#' # plot(cugtest) #' @export test_random <- function(.data, FUN, ..., times = 1000, strategy = "sequential", verbose = FALSE){ + if(missing(.data)) {expect_nodes(); .data <- .G()} args <- unlist(list(...)) if (!is.null(args)) { obsd <- FUN(.data, args) } else { obsd <- FUN(.data) } - n <- manynet::network_dims(.data) - d <- network_density(.data) oplan <- future::plan(strategy) on.exit(future::plan(oplan), add = TRUE) - rands <- furrr::future_map(1:times, manynet::generate_random, n = n, p = d, + rands <- furrr::future_map(1:times, manynet::generate_random, n = .data, .progress = verbose, .options = furrr::furrr_options(seed = T)) if (length(args) > 0) { @@ -64,7 +64,7 @@ test_random <- function(.data, FUN, ..., testdist = simd, mode = manynet::is_directed(.data), diag = manynet::is_complex(.data), - cmode = "csize", + cmode = "edges", plteobs = mean(simd <= obsd), pgteobs = mean(simd >= obsd), reps = times) @@ -73,23 +73,24 @@ test_random <- function(.data, FUN, ..., } #' @rdname tests #' @examples -#' (qaptest <- test_permutation(marvel_friends, -#' network_heterophily, attribute = "Attractive", -#' times = 200)) -#' plot(qaptest) +#' # (qaptest <- test_permutation(marvel_friends, +#' # manynet::net_heterophily, attribute = "Attractive", +#' # times = 200)) +#' # plot(qaptest) #' @export test_permutation <- function(.data, FUN, ..., times = 1000, strategy = "sequential", verbose = FALSE){ + if(missing(.data)) {expect_nodes(); .data <- .G()} args <- unlist(list(...)) if (!is.null(args)) { obsd <- FUN(.data, args) } else { obsd <- FUN(.data) } - n <- manynet::network_dims(.data) - d <- network_density(.data) + n <- manynet::net_dims(.data) + d <- manynet::net_density(.data) oplan <- future::plan(strategy) on.exit(future::plan(oplan), add = TRUE) rands <- furrr::future_map(1:times, @@ -169,7 +170,35 @@ plot.network_test <- function(x, ..., color="red", linewidth=1.2) + ggplot2::ylab("Density") } -#' @rdname tests +# Tests of network distributions #### + +#' Tests of network distributions +#' +#' @description +#' These functions conduct tests of distributions: +#' +#' - `test_distribution()` performs a two-sample Kolmogorov-Smirnov test on +#' whether two "diff_model" objects are drawn from the same distribution. +#' - `test_fit()` performs a chi-squared test on the squared Mahalanobis distance +#' between a diff_model and diff_models objects. +#' +#' @name test_distributions +#' @family models +NULL + +#' @rdname test_distributions +#' @param diff_model1,diff_model2 diff_model objects +#' @examples +#' # test_distribution(play_diffusion(ison_networkers), +#' # play_diffusion(ison_networkers, thresholds = 75)) +#' @export +test_distribution <- function(diff_model1, diff_model2){ + out <- stats::ks.test(diff_model1$I, diff_model2$I) + dplyr::tibble(statistic = out$statistic, p.value = out$p.value, + nobs = length(diff_model1$I)) +} + +#' @rdname test_distributions #' @param diff_model A diff_model object is returned by #' `play_diffusion()` or `as_diffusion()` and contains #' a single empirical or simulated diffusion. @@ -191,23 +220,24 @@ plot.network_test <- function(x, ..., # the set of simulated diffusions (and thus that the model is not a good fit). #' @examples #' # Playing a reasonably quick diffusion -#' x <- play_diffusion(generate_random(15), transmissibility = 0.7) +#' # x <- play_diffusion(generate_random(15), transmissibility = 0.7) #' # Playing a slower diffusion -#' y <- play_diffusions(generate_random(15), transmissibility = 0.1, times = 40) -#' plot(x) -#' plot(y) -#' test_gof(x, y) +#' # y <- play_diffusions(generate_random(15), transmissibility = 0.1, times = 40) +#' # plot(x) +#' # plot(y) +#' # test_fit(x, y) #' @export -test_gof <- function(diff_model, diff_models){ # make into method? +test_fit <- function(diff_model, diff_models){ # make into method? x <- diff_model y <- diff_models sim <- `0` <- NULL sims <- y |> dplyr::select(sim, t, I) |> tidyr::pivot_wider(names_from = t, values_from = I) |> dplyr::select(-c(sim, `0`)) + sims <- sims[,colSums(stats::cov(sims))!=0] mah <- stats::mahalanobis(x$I[-1], colMeans(sims), stats::cov(sims)) pval <- pchisq(mah, df=length(x$I[-1]), lower.tail=FALSE) dplyr::tibble(statistic = mah, p.value = pval, - df = length(x$I[-1]), nobs = nrow(sims)) + df = length(x$I[-1]), nobs = nrow(sims)) } diff --git a/R/motif_census.R b/R/motif_census.R deleted file mode 100644 index 527fd6445..000000000 --- a/R/motif_census.R +++ /dev/null @@ -1,464 +0,0 @@ -# Node censuses #### - -#' Censuses of nodes' motifs -#' -#' @description -#' These functions include ways to take a census of the positions of nodes -#' in a network: -#' -#' - `node_tie_census()` returns a census of the ties in a network. -#' For directed networks, out-ties and in-ties are bound together. -#' for multiplex networks, the various types of ties are bound together. -#' - `node_triad_census()` returns a census of the triad configurations -#' nodes are embedded in. -#' - `node_quad_census()` returns a census of nodes' positions -#' in motifs of four nodes. -#' - `node_path_census()` returns the shortest path lengths -#' of each node to every other node in the network. -#' -#' @name node_census -#' @family motifs -#' @inheritParams cohesion -#' @importFrom igraph vcount make_ego_graph delete_vertices triad_census -NULL - -#' @rdname node_census -#' @examples -#' task_eg <- manynet::to_named(manynet::to_uniplex(manynet::ison_algebra, "tasks")) -#' (tie_cen <- node_tie_census(task_eg)) -#' @export -node_tie_census <- function(.data){ - object <- manynet::as_igraph(.data) - # edge_names <- manynet::network_tie_attributes(object) - if (manynet::is_directed(object)) { - if (manynet::is_multiplex(.data)) { - mat <- do.call(rbind, lapply(unique(manynet::tie_attribute(object, "type")), - function(x){ - rc <- manynet::as_matrix(manynet::to_uniplex(object, x)) - rbind(rc, t(rc)) - })) - } else { - rc <- manynet::as_matrix(object) - mat <- rbind(rc, t(rc)) - } - } else { - if (manynet::is_multiplex(.data)) { - mat <- do.call(rbind, lapply(unique(manynet::tie_attribute(object, "type")), - function(x){ - manynet::as_matrix(manynet::to_uniplex(object, x)) - })) - } else { - mat <- manynet::as_matrix(object) - } - } - if(manynet::is_labelled(object) & manynet::is_directed(object)) - if(manynet::is_multiplex(.data)){ - rownames(mat) <- apply(expand.grid(c(paste0("from", manynet::node_names(object)), - paste0("to", manynet::node_names(object))), - unique(manynet::tie_attribute(object, "type"))), - 1, paste, collapse = "_") - } else { - rownames(mat) <- rep(c(paste0("from", manynet::node_names(object)), - paste0("to", manynet::node_names(object)))) - } - make_node_motif(t(mat), object) -} - -#' @rdname node_census -#' @references -#' Davis, James A., and Samuel Leinhardt. 1967. -#' “\href{https://files.eric.ed.gov/fulltext/ED024086.pdf}{The Structure of Positive Interpersonal Relations in Small Groups}.” 55. -#' @examples -#' (triad_cen <- node_triad_census(task_eg)) -#' @export -node_triad_census <- function(.data){ - out <- t(sapply(seq.int(manynet::network_nodes(.data)), - function(x) network_triad_census(.data) - network_triad_census(manynet::delete_nodes(.data, x)))) - rownames(out) <- manynet::node_names(.data) - make_node_motif(out, .data) -} - -#' @rdname node_census -#' @section Quad census: -#' The quad census uses the `{oaqc}` package to do -#' the heavy lifting of counting the number of each orbits. -#' See `vignette('oaqc')`. -#' However, our function relabels some of the motifs -#' to avoid conflicts and improve some consistency with -#' other census-labelling practices. -#' The letter-number pairing of these labels indicate -#' the number and configuration of ties. -#' For now, we offer a rough translation: -#' -#' | migraph | Ortmann and Brandes -#' | ------------- |------------- | -#' | E4 | co-K4 -#' | I40, I41 | co-diamond -#' | H4 | co-C4 -#' | L42, L41, L40 | co-paw -#' | D42, D40 | co-claw -#' | U42, U41 | P4 -#' | Y43, Y41 | claw -#' | P43, P42, P41 | paw -#' | 04 | C4 -#' | Z42, Z43 | diamond -#' | X4 | K4 -#' -#' See also [this list of graph classes](https://www.graphclasses.org/smallgraphs.html#nodes4). -#' @importFrom tidygraph %E>% -#' @references -#' Ortmann, Mark, and Ulrik Brandes. 2017. -#' “Efficient Orbit-Aware Triad and Quad Census in Directed and Undirected Graphs.” -#' \emph{Applied Network Science} 2(1):13. -#' \doi{10.1007/s41109-017-0027-2}. -#' @examples -#' node_quad_census(manynet::ison_southern_women) -#' @export -node_quad_census <- function(.data){ - if (!("oaqc" %in% rownames(utils::installed.packages()))) { - message("Please install package `{oaqc}`.") - } else { - graph <- .data %>% manynet::as_tidygraph() %E>% - as.data.frame() - out <- oaqc::oaqc(graph)[[1]] - out <- out[-1,] - rownames(out) <- manynet::node_names(.data) - colnames(out) <- c("E4", # co-K4 - "I41","I40", # co-diamond - "H4", # co-C4 - "L42","L41","L40", # co-paw - "D42","D40", # co-claw - "U42","U41", # P4 - "Y43","Y41", # claw - "P43","P42","P41", # paw - "04", # C4 - "Z42","Z43", # diamond - "X4") # K4 - if(manynet::is_twomode(.data)) out <- out[,-c(8,9,14,15,16,18,19,20)] - make_node_motif(out, .data) - } -} - -# #' @export -# node_bmotif_census <- function(.data, normalized = FALSE){ -# if (!("bmotif" %in% rownames(utils::installed.packages()))) { -# message("Please install package `{bmotif}`.") -# out <- bmotif::node_positions(manynet::as_matrix(.data), -# weights_method = ifelse(manynet::is_weighted(.data), -# 'mean_motifweights', 'none'), -# normalisation = ifelse(normalized, -# 'levelsize_NAzero', 'none')) -# make_node_motif(out, .data) -# } -# } -# -# #' @export -# node_igraph_census <- function(.data, normalized = FALSE){ -# out <- igraph::motifs(manynet::as_igraph(.data), 4) -# if(manynet::is_labelled(.data)) -# rownames(out) <- manynet::node_names(.data) -# colnames(out) <- c("co-K4", -# "co-diamond", -# "co-C4", -# "co-paw", -# "co-claw", -# "P4", -# "claw", -# "paw", -# "C4", -# "diamond", -# "K4") -# make_node_motif(out, .data) -# } - -#' @rdname node_census -#' @importFrom igraph distances -#' @references -#' Dijkstra, Edsger W. 1959. -#' "A note on two problems in connexion with graphs". -#' _Numerische Mathematik_ 1, 269-71. -#' \doi{10.1007/BF01386390}. -#' -#' Opsahl, Tore, Filip Agneessens, and John Skvoretz. 2010. -#' "Node centrality in weighted networks: Generalizing degree and shortest paths". -#' _Social Networks_ 32(3): 245-51. -#' \doi{10.1016/j.socnet.2010.03.006}. -#' @examples -#' node_path_census(manynet::ison_adolescents) -#' node_path_census(manynet::ison_southern_women) -#' @export -node_path_census <- function(.data){ - if(manynet::is_weighted(.data)){ - tore <- manynet::as_matrix(.data)/mean(manynet::as_matrix(.data)) - out <- 1/tore - } else out <- igraph::distances(manynet::as_igraph(.data)) - diag(out) <- 0 - make_node_motif(out, .data) -} - -# Network censuses #### - -#' Censuses of motifs at the network level -#' -#' @description -#' These functions include ways to take a census of the positions of nodes -#' in a network: -#' -#' - `network_dyad_census()` returns a census of dyad motifs in a network. -#' - `network_triad_census()` returns a census of triad motifs in a network. -#' - `network_mixed_census()` returns a census of triad motifs that span -#' a one-mode and a two-mode network. -#' -#' @name network_census -#' @family motifs -#' @inheritParams node_census -#' @param object2 A second, two-mode migraph-consistent object. -NULL - -#' @rdname network_census -#' @examples -#' network_dyad_census(manynet::ison_algebra) -#' @export -network_dyad_census <- function(.data) { - if (manynet::is_twomode(.data)) { - stop("A twomode or multilevel option for a dyad census is not yet implemented.") - } else { - out <- suppressWarnings(igraph::dyad_census(manynet::as_igraph(.data))) - out <- unlist(out) - names(out) <- c("Mutual", "Asymmetric", "Null") - if (!manynet::is_directed(.data)) out <- out[c(1, 3)] - make_network_motif(out, .data) - } -} - -#' @rdname network_census -#' @references -#' Davis, James A., and Samuel Leinhardt. 1967. -#' “\href{https://files.eric.ed.gov/fulltext/ED024086.pdf}{The Structure of Positive Interpersonal Relations in Small Groups}.” 55. -#' @examples -#' network_triad_census(manynet::ison_adolescents) -#' @export -network_triad_census <- function(.data) { - if (manynet::is_twomode(.data)) { - stop("A twomode or multilevel option for a triad census is not yet implemented.") - } else { - out <- suppressWarnings(igraph::triad_census(as_igraph(.data))) - names(out) <- c("003", "012", "102", "021D", - "021U", "021C", "111D", "111U", - "030T", "030C", "201", "120D", - "120U", "120C", "210", "300") - if (!manynet::is_directed(.data)) out <- out[c(1, 2, 3, 11, 15, 16)] - make_network_motif(out, .data) - } -} - -#' @rdname network_census -#' @source Alejandro Espinosa 'netmem' -#' @references -#' Hollway, James, Alessandro Lomi, Francesca Pallotti, and Christoph Stadtfeld. 2017. -#' “Multilevel Social Spaces: The Network Dynamics of Organizational Fields.” -#' _Network Science_ 5(2): 187–212. -#' \doi{10.1017/nws.2017.8} -#' @examples -#' marvel_friends <- manynet::to_unsigned(manynet::ison_marvel_relationships, "positive") -#' (mixed_cen <- network_mixed_census(marvel_friends, manynet::ison_marvel_teams)) -#' @export -network_mixed_census <- function (.data, object2) { - if(manynet::is_twomode(.data)) - stop("First object should be a one-mode network") - if(!manynet::is_twomode(object2)) - stop("Second object should be a two-mode network") - if(manynet::network_dims(.data)[1] != manynet::network_dims(object2)[1]) - stop("Non-conformable arrays") - m1 <- manynet::as_matrix(.data) - m2 <- manynet::as_matrix(object2) - cp <- function(m) (-m + 1) - onemode.reciprocal <- m1 * t(m1) - onemode.forward <- m1 * cp(t(m1)) - onemode.backward <- cp(m1) * t(m1) - onemode.null <- cp(m1) * cp(t(m1)) - diag(onemode.forward) <- 0 - diag(onemode.backward) <- 0 - diag(onemode.null) <- 0 - bipartite.twopath <- m2 %*% t(m2) - bipartite.null <- cp(m2) %*% cp(t(m2)) - bipartite.onestep1 <- m2 %*% cp(t(m2)) - bipartite.onestep2 <- cp(m2) %*% t(m2) - diag(bipartite.twopath) <- 0 - diag(bipartite.null) <- 0 - diag(bipartite.onestep1) <- 0 - diag(bipartite.onestep2) <- 0 - res <- c("22" = sum(onemode.reciprocal * bipartite.twopath) / 2, - "21" = sum(onemode.forward * bipartite.twopath) / 2 + sum(onemode.backward * bipartite.twopath) / 2, - "20" = sum(onemode.null * bipartite.twopath) / 2, - "12" = sum(onemode.reciprocal * bipartite.onestep1) / 2 + sum(onemode.reciprocal * bipartite.onestep2) / 2, - "11D" = sum(onemode.forward * bipartite.onestep1) / 2 + sum(onemode.backward * bipartite.onestep2) / 2, - "11U" = sum(onemode.forward * bipartite.onestep2) / 2 + sum(onemode.backward * bipartite.onestep1) / 2, - "10" = sum(onemode.null * bipartite.onestep2) / 2 + sum(onemode.null * bipartite.onestep1) / 2, - "02" = sum(onemode.reciprocal * bipartite.null) / 2, - "01" = sum(onemode.forward * bipartite.null) / 2 + sum(onemode.backward * bipartite.null) / 2, - "00" = sum(onemode.null * bipartite.null) / 2) - make_network_motif(res, .data) -} - -# Brokerage #### - -#' Censuses of brokerage motifs -#' -#' @description -#' These functions include ways to take a census of the brokerage positions of nodes -#' in a network: -#' -#' - `node_brokerage_census()` returns the Gould-Fernandez brokerage -#' roles played by nodes in a network. -#' - `network_brokerage_census()` returns the Gould-Fernandez brokerage -#' roles in a network. -#' -#' @name brokerage_census -#' @family motifs -#' @inheritParams node_census -#' @param membership A vector of partition membership as integers. -#' @param standardized Whether the score should be standardized -#' into a _z_-score indicating how many standard deviations above -#' or below the average the score lies. -NULL - -#' @rdname brokerage_census -#' @importFrom sna brokerage -#' @references -#' Gould, R.V. and Fernandez, R.M. 1989. -#' “Structures of Mediation: A Formal Approach to Brokerage in Transaction Networks.” -#' _Sociological Methodology_, 19: 89-126. -#' -#' Jasny, Lorien, and Mark Lubell. 2015. -#' “Two-Mode Brokerage in Policy Networks.” -#' _Social Networks_ 41:36–47. -#' \doi{10.1016/j.socnet.2014.11.005}. -#' @examples -#' node_brokerage_census(manynet::ison_networkers, "Discipline") -#' @export -node_brokerage_census <- function(.data, membership, standardized = FALSE){ - if(!manynet::is_twomode(.data)){ - out <- sna::brokerage(manynet::as_network(.data), - manynet::node_attribute(.data, membership)) - out <- if(standardized) out$z.nli else out$raw.nli - colnames(out) <- c("Coordinator", "Itinerant", "Gatekeeper", - "Representative", "Liaison", "Total") - } else { - out <- suppressWarnings(sna::brokerage(manynet::as_network(manynet::to_mode1(.data)), - manynet::node_attribute(.data, membership))) - out <- if(standardized) out$z.nli else out$raw.nli - out <- out[,-4] - colnames(out) <- c("Coordinator", "Itinerant", "Gatekeeper", - "Liaison", "Total") - } - make_node_motif(out, .data) -} - -#' @rdname brokerage_census -#' @examples -#' network_brokerage_census(manynet::ison_networkers, "Discipline") -#' @export -network_brokerage_census <- function(.data, membership, standardized = FALSE){ - if(!manynet::is_twomode(.data)){ - out <- sna::brokerage(manynet::as_network(.data), - manynet::node_attribute(.data, membership)) - out <- if(standardized) out$z.gli else out$raw.gli - names(out) <- c("Coordinator", "Itinerant", "Gatekeeper", - "Representative", "Liaison", "Total") - } else { - out <- suppressWarnings(sna::brokerage(manynet::as_network(manynet::to_mode1(.data)), - manynet::node_attribute(.data, membership))) - out <- if(standardized) out$z.gli else out$raw.gli - names(out) <- c("Coordinator", "Itinerant", "Gatekeeper", - "Representative", "Liaison", "Total") - } - make_network_motif(out, .data) -} - -#' @rdname brokerage_census -#' @references -#' Hamilton, Matthew, Jacob Hileman, and Orjan Bodin. 2020. -#' "Evaluating heterogeneous brokerage: New conceptual and methodological approaches -#' and their application to multi-level environmental governance networks" -#' _Social Networks_ 61: 1-10. -#' \doi{10.1016/j.socnet.2019.08.002} -#' @export -node_brokering_activity <- function(.data, membership){ - from <- to.y <- to_memb <- from_memb <- NULL - twopaths <- .to_twopaths(.data) - if(!missing(membership)){ - twopaths$from_memb <- manynet::node_attribute(.data, membership)[`if`(manynet::is_labelled(.data), - match(twopaths$from, manynet::node_names(.data)), - twopaths$from)] - twopaths$to_memb <- manynet::node_attribute(.data, membership)[`if`(manynet::is_labelled(.data), - match(twopaths$to.y, manynet::node_names(.data)), - twopaths$to.y)] - twopaths <- dplyr::filter(twopaths, from_memb != to_memb) - } - # tabulate brokerage - out <- c(table(twopaths$to)) - # correct ordering for named data - if(manynet::is_labelled(.data)) out <- out[match(manynet::node_names(.data), names(out))] - # missings should be none - out[is.na(out)] <- 0 - make_node_measure(out, .data) -} - -#' @rdname brokerage_census -#' @examples -#' node_brokering_exclusivity(ison_networkers, "Discipline") -#' @export -node_brokering_exclusivity <- function(.data, membership){ - from <- to.y <- to_memb <- from_memb <- NULL - twopaths <- .to_twopaths(.data) - if(!missing(membership)){ - twopaths$from_memb <- manynet::node_attribute(.data, membership)[`if`(manynet::is_labelled(.data), - match(twopaths$from, manynet::node_names(.data)), - twopaths$from)] - twopaths$to_memb <- manynet::node_attribute(.data, membership)[`if`(manynet::is_labelled(.data), - match(twopaths$to.y, manynet::node_names(.data)), - twopaths$to.y)] - twopaths <- dplyr::filter(twopaths, from_memb != to_memb) - } - # get only exclusive paths - out <- twopaths %>% dplyr::group_by(from, to.y) %>% dplyr::filter(dplyr::n()==1) - # tabulate brokerage - out <- c(table(out$to)) - # correct ordering for named data - if(manynet::is_labelled(.data)) out <- out[match(manynet::node_names(.data), names(out))] - # missings should be none - out[is.na(out)] <- 0 - make_node_measure(out, .data) -} - -#' @rdname brokerage_census -#' @export -node_brokering <- function(.data, membership){ - activ <- node_brokering_activity(.data, membership) - exclusiv <- node_brokering_exclusivity(.data, membership) - activ <- activ - mean(activ) - exclusiv <- exclusiv - mean(exclusiv) - out <- dplyr::case_when(activ > 0 & exclusiv > 0 ~ "Powerhouse", - activ > 0 & exclusiv < 0 ~ "Connectors", - activ < 0 & exclusiv > 0 ~ "Linchpins", - activ < 0 & exclusiv < 0 ~ "Sideliners") - make_node_member(out, .data) -} - -.to_twopaths <- function(.data){ - to <- from <- to.y <- NULL - if(!manynet::is_directed(.data)){ - el <- manynet::as_edgelist(manynet::to_reciprocated(.data)) - } else el <- manynet::as_edgelist(.data) - twopaths <- dplyr::full_join(el, el, - by = dplyr::join_by(to == from), - relationship = "many-to-many") - # remove non two-paths - twopaths <- dplyr::filter(twopaths, !(is.na(from) | is.na(to.y))) - # remove reciprocated paths - twopaths <- dplyr::filter(twopaths, from != to.y) - # remove triads - twopaths <- dplyr::filter(twopaths, !paste(from, to.y) %in% paste(from, to)) - twopaths -} diff --git a/R/reexports_ggplot2.R b/R/reexports_ggplot2.R index 9d58908be..16aacbe4f 100644 --- a/R/reexports_ggplot2.R +++ b/R/reexports_ggplot2.R @@ -29,3 +29,8 @@ ggplot2::aes #' @importFrom ggplot2 ggsave #' @export ggplot2::ggsave + +#' @importFrom ggplot2 scale_y_discrete +#' @export +ggplot2::scale_y_discrete + diff --git a/README.Rmd b/README.Rmd index aa32e8da9..2aef052c9 100644 --- a/README.Rmd +++ b/README.Rmd @@ -20,7 +20,7 @@ list_data <- function(string){ } ``` -# migraph +# migraph migraph logo [![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html#maturing) @@ -47,11 +47,9 @@ or require one or more other specific packages. Translating between packages various syntaxes and expectations can introduce significant transaction costs though, driving confusion, inefficiencies, and errors. -`{migraph}` builds upon `{manynet}` to offer smart solutions to these problems. -It includes functions for marking and measuring networks and their nodes and ties, -identifying motifs and memberships in them, -and modelling these networks or simulating processes such as diffusion upon them. -Based on `{manynet}`, every function works for any compatible network format +`{migraph}` builds upon [`{manynet}`](https://stocnet.github.io/manynet/) to offer smart solutions to these problems. +`{migraph}` includes functions for analysing and modelling these networks. +Since it is based on `{manynet}`, every function works for any compatible network format - from base R matrices or edgelists as data frames, [`{igraph}`](https://igraph.org/r/), [`{network}`](https://statnet.org), or @@ -60,7 +58,19 @@ This means it is compatible with your existing workflow, is extensible by other packages, and uses the most efficient algorithm available for each task. - +- [About the package](#about-the-package) + - [Package background](#package-background) +- [How does migraph help?](#how-does-migraph-help) +- [Tutorials](#tutorials) +- [Installation](#installation) + - [Stable](#stable) + - [Development](#development) +- [Relationship to other packages](#relationship-to-other-packages) +- [Funding details](#funding-details) + +### Package background + +Cover image of the book Multimodal Political Networks The package is intended as a software companion to the book: @@ -68,79 +78,44 @@ The package is intended as a software companion to the book: Cambridge University Press: Cambridge. Most datasets used in the book are included in this package, -and the package implements most methods discussed in the book. +and `manynet` and `{migraph}` together implement most methods discussed in the book. Since many of theses datasets and routines are discussed and analysed more there, -if you like the package please check out the book, and vice versa. +if you like the package(s) please check out the book, and vice versa. ## How does migraph help? -`{migraph}` includes five special groups of functions, -each with their own pretty `print()` and `plot()` methods: -marks, measures, memberships, motifs, and models. - -`{migraph}` uses a common syntax to help new and experienced network analysts -find the right function and use it correctly. -All `network_*()` functions return a value for the network/graph or for each mode in the network. -All `node_*()` functions return values for each node or vertex in the network. -And all `tie_*()` functions return values for each tie or edge in the network. -Functions are given intuitive and succinct names that avoid conflicts -with existing function names wherever possible. -All results are normalised by default, facilitating comparison. - -### Measures - -`{migraph}` also offers a large and growing smorgasbord of measures that -can be used at the node, tie, and network level. -Each recognises whether the network is directed or undirected, -weighted or unweighted, one-mode or two-mode. -All return normalized values wherever possible, -though this can be overrided. -Here are some examples: - -- _Centrality_: `node_degree()`, `node_closeness()`, `node_betweenness()`, and `node_eigenvector()` -- _Centralization_: `network_degree()`, `network_closeness()`, `network_betweenness()`, and `network_eigenvector()` -- _Cohesion_: `network_density()`, `network_reciprocity()`, `network_transitivity()`, `network_equivalency()`, and `network_congruency()` -- _Connectedness_: `network_components()`, `network_cohesion()`, `network_adhesion()`, `network_diameter()`, `network_length()` -- _Diversity_: `network_diversity()`, `network_homophily()`, `network_assortativity()`, - `node_diversity()`, `node_homophily()`, `node_assortativity()`, `node_richness()` -- _Innovation_: e.g. `node_redundancy()`, `node_effsize()`, `node_efficiency()`, `node_constraint()`, `node_hierarchy()` -- _Topological features_: e.g. `network_core()`, `network_factions()`, `network_modularity()`, `network_smallworld()`, `network_balance()` - -Please explore [the list of functions](https://stocnet.github.io/migraph/reference/index.html) to find out more. - -### Motifs and Memberships - -The package also include functions for returning various censuses -at the network or node level, e.g.: - -- `r list_functions("network_.*_census")` -- `r list_functions("node_.*_census")` - -These can be analysed alone, or used as a profile for establishing equivalence. -`{migraph}` offers both HCA and CONCOR algorithms, -as well as elbow, silhouette, and strict methods for _k_-cluster selection. - -- `r list_functions("node.*_equivalence")` - -`{migraph}` also includes functions for establishing membership on other bases, -such as typical community detection algorithms, -as well as component and core-periphery partitioning algorithms. - -### Models - -All measures can be tested against conditional uniform graph (CUG) -or quadratic assignment procedure (QAP) distributions using: +`{migraph}` allows the testing of `{manynet}` measures against +conditional uniform graph (CUG) or quadratic assignment procedure (QAP) distributions using: - `r list_functions("^test_")` +Plot showing the results of a QAP test + Hypotheses can also be tested within multivariate models via multiple (linear or logistic) regression QAP: - `network_reg()` +A violin plot showing the results of an MRQAP + `{migraph}` is the only package that offers these testing frameworks for two-mode networks as well as one-mode networks. +## Tutorials + +Together with `{manynet}`, this package makes available interactive `{learnr}` tutorials. +The easiest way to access the tutorials is via `run_tute()`. +If no tutorial name is provided, the function will return a list of tutorials +currently available in either package: + +```{r learnr-tutes} +library(migraph) +run_tute() +# run_tute("tutorial5") +``` + +For more details on the `{learnr}` package, see [here](https://rstudio.github.io/learnr/). + ## Installation ### Stable @@ -156,6 +131,15 @@ You can then begin to use `{migraph}` by loading the package: This will load any required packages and make the data contained within the package available. +`{migraph}` relies on some packages only for one or two rather specific functions. +By default these are not installed together with `{migraph}`, +but we make it easy to install them as and when needed for the first time with a console prompt. +If you would prefer not to encounter these prompts, +or plan to use the package for the first time through tutorials, +you can make sure all the dependencies are installed with: + +`install.packages('migraph', dependencies = TRUE)` + ### Development For the latest development version, @@ -180,20 +164,11 @@ please install the `{remotes}` or `{devtools}` package from CRAN and then: - For latest development version: `remotes::install_github("stocnet/migraph@develop")` -### Tutorials +### Other sources -Together with `{manynet}`, this package makes available interactive `{learnr}` tutorials. -The easiest way to access the tutorials is via `run_tute()`. -If no tutorial name is provided, the function will return a list of tutorials -currently available in either package: +Those using Mac computers may also install using Macports: -```{r learnr-tutes} -library(migraph) -run_tute() -# run_tute("tutorial5") -``` - -For more details on the `{learnr}` package, see [here](https://rstudio.github.io/learnr/). +`sudo port install R-migraph` ## Relationship to other packages diff --git a/README.md b/README.md index 8bef8ff3e..79557314f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# migraph +# migraph migraph logo @@ -36,19 +36,31 @@ require one or more other specific packages. Translating between packages various syntaxes and expectations can introduce significant transaction costs though, driving confusion, inefficiencies, and errors. -`{migraph}` builds upon `{manynet}` to offer smart solutions to these -problems. It includes functions for marking and measuring networks and -their nodes and ties, identifying motifs and memberships in them, and -modelling these networks or simulating processes such as diffusion upon -them. Based on `{manynet}`, every function works for any compatible -network format - from base R matrices or edgelists as data frames, +`{migraph}` builds upon +[`{manynet}`](https://stocnet.github.io/manynet/) to offer smart +solutions to these problems. `{migraph}` includes functions for +analysing and modelling these networks. Since it is based on +`{manynet}`, every function works for any compatible network format - +from base R matrices or edgelists as data frames, [`{igraph}`](https://igraph.org/r/), [`{network}`](https://statnet.org), or [`{tidygraph}`](https://tidygraph.data-imaginist.com/index.html) objects. This means it is compatible with your existing workflow, is extensible by other packages, and uses the most efficient algorithm available for each task. - +- [About the package](#about-the-package) + - [Package background](#package-background) +- [How does migraph help?](#how-does-migraph-help) +- [Tutorials](#tutorials) +- [Installation](#installation) + - [Stable](#stable) + - [Development](#development) +- [Relationship to other packages](#relationship-to-other-packages) +- [Funding details](#funding-details) + +### Package background + +Cover image of the book Multimodal Political Networks The package is intended as a software companion to the book: @@ -57,91 +69,61 @@ The package is intended as a software companion to the book: > Networks*](https://www.cambridge.org/core/books/multimodal-political-networks/43EE8C192A1B0DCD65B4D9B9A7842128). > Cambridge University Press: Cambridge. -Most datasets used in the book are included in this package, and the -package implements most methods discussed in the book. Since many of -theses datasets and routines are discussed and analysed more there, if -you like the package please check out the book, and vice versa. +Most datasets used in the book are included in this package, and +`manynet` and `{migraph}` together implement most methods discussed in +the book. Since many of theses datasets and routines are discussed and +analysed more there, if you like the package(s) please check out the +book, and vice versa. ## How does migraph help? -`{migraph}` includes five special groups of functions, each with their -own pretty `print()` and `plot()` methods: marks, measures, memberships, -motifs, and models. - -`{migraph}` uses a common syntax to help new and experienced network -analysts find the right function and use it correctly. All `network_*()` -functions return a value for the network/graph or for each mode in the -network. All `node_*()` functions return values for each node or vertex -in the network. And all `tie_*()` functions return values for each tie -or edge in the network. Functions are given intuitive and succinct names -that avoid conflicts with existing function names wherever possible. All -results are normalised by default, facilitating comparison. - -### Measures - -`{migraph}` also offers a large and growing smorgasbord of measures that -can be used at the node, tie, and network level. Each recognises whether -the network is directed or undirected, weighted or unweighted, one-mode -or two-mode. All return normalized values wherever possible, though this -can be overrided. Here are some examples: - -- *Centrality*: `node_degree()`, `node_closeness()`, - `node_betweenness()`, and `node_eigenvector()` -- *Centralization*: `network_degree()`, `network_closeness()`, - `network_betweenness()`, and `network_eigenvector()` -- *Cohesion*: `network_density()`, `network_reciprocity()`, - `network_transitivity()`, `network_equivalency()`, and - `network_congruency()` -- *Connectedness*: `network_components()`, `network_cohesion()`, - `network_adhesion()`, `network_diameter()`, `network_length()` -- *Diversity*: `network_diversity()`, `network_homophily()`, - `network_assortativity()`, `node_diversity()`, `node_homophily()`, - `node_assortativity()`, `node_richness()` -- *Innovation*: e.g. `node_redundancy()`, `node_effsize()`, - `node_efficiency()`, `node_constraint()`, `node_hierarchy()` -- *Topological features*: e.g. `network_core()`, `network_factions()`, - `network_modularity()`, `network_smallworld()`, `network_balance()` - -Please explore [the list of -functions](https://stocnet.github.io/migraph/reference/index.html) to -find out more. - -### Motifs and Memberships - -The package also include functions for returning various censuses at the -network or node level, e.g.: - -- `network_brokerage_census()`, `network_dyad_census()`, - `network_mixed_census()`, `network_triad_census()` -- `node_brokerage_census()`, `node_path_census()`, `node_quad_census()`, - `node_tie_census()`, `node_triad_census()` - -These can be analysed alone, or used as a profile for establishing -equivalence. `{migraph}` offers both HCA and CONCOR algorithms, as well -as elbow, silhouette, and strict methods for *k*-cluster selection. - -- `node_automorphic_equivalence()`, `node_equivalence()`, - `node_regular_equivalence()`, `node_structural_equivalence()` - -`{migraph}` also includes functions for establishing membership on other -bases, such as typical community detection algorithms, as well as -component and core-periphery partitioning algorithms. - -### Models - -All measures can be tested against conditional uniform graph (CUG) or -quadratic assignment procedure (QAP) distributions using: - -- `test_gof()`, `test_permutation()`, `test_random()` +`{migraph}` allows the testing of `{manynet}` measures against +conditional uniform graph (CUG) or quadratic assignment procedure (QAP) +distributions using: + +- `test_distribution()`, `test_fit()`, `test_gof()`, + `test_permutation()`, `test_random()` + +Plot showing the results of a QAP test Hypotheses can also be tested within multivariate models via multiple (linear or logistic) regression QAP: - `network_reg()` +A violin plot showing the results of an MRQAP + `{migraph}` is the only package that offers these testing frameworks for two-mode networks as well as one-mode networks. +## Tutorials + +Together with `{manynet}`, this package makes available interactive +`{learnr}` tutorials. The easiest way to access the tutorials is via +`run_tute()`. If no tutorial name is provided, the function will return +a list of tutorials currently available in either package: + +``` r +library(migraph) +run_tute() +#> # A tibble: 9 × 3 +#> package name title +#> +#> 1 manynet tutorial0 Intro to R +#> 2 manynet tutorial1 Data +#> 3 manynet tutorial2 Visualisation +#> 4 manynet tutorial3 Centrality +#> 5 manynet tutorial4 Community +#> 6 manynet tutorial5 Position +#> 7 manynet tutorial6 Topology +#> 8 manynet tutorial7 Diffusion +#> 9 migraph tutorial8 Diversity and Regression +# run_tute("tutorial5") +``` + +For more details on the `{learnr}` package, see +[here](https://rstudio.github.io/learnr/). + ## Installation ### Stable @@ -158,6 +140,15 @@ You can then begin to use `{migraph}` by loading the package: This will load any required packages and make the data contained within the package available. +`{migraph}` relies on some packages only for one or two rather specific +functions. By default these are not installed together with `{migraph}`, +but we make it easy to install them as and when needed for the first +time with a console prompt. If you would prefer not to encounter these +prompts, or plan to use the package for the first time through +tutorials, you can make sure all the dependencies are installed with: + +`install.packages('migraph', dependencies = TRUE)` + ### Development For the latest development version, for slightly earlier access to new @@ -186,33 +177,11 @@ and then: - For latest development version: `remotes::install_github("stocnet/migraph@develop")` -### Tutorials - -Together with `{manynet}`, this package makes available interactive -`{learnr}` tutorials. The easiest way to access the tutorials is via -`run_tute()`. If no tutorial name is provided, the function will return -a list of tutorials currently available in either package: +### Other sources -``` r -library(migraph) -run_tute() -#> # A tibble: 9 × 3 -#> package name title -#> -#> 1 manynet tutorial0 Intro to R -#> 2 manynet tutorial1 Data -#> 3 manynet tutorial2 Visualisation -#> 4 migraph tutorial3 Centrality -#> 5 migraph tutorial4 Community -#> 6 migraph tutorial5 Position -#> 7 migraph tutorial6 Topology -#> 8 manynet tutorial7 Diffusion -#> 9 migraph tutorial8 Regression -# run_tute("tutorial5") -``` +Those using Mac computers may also install using Macports: -For more details on the `{learnr}` package, see -[here](https://rstudio.github.io/learnr/). +`sudo port install R-migraph` ## Relationship to other packages diff --git a/cran-comments.md b/cran-comments.md index ae7632534..e46ec1750 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,15 @@ ## Test environments -* local R installation, x86_64-apple-darwin20, R 4.3.2 -* Mac OS X 12.6.9 (on Github), R 4.3.2 -* Microsoft Windows Server 2022 10.0.20348 (on Github), R 4.3.2 -* Ubuntu 22.04.3 (on Github), R 4.3.2 +* local R installation, aarch64-apple-darwin20, R 4.4.0 +* macOS 14.5 (on Github), R 4.4.1 +* Microsoft Windows Server 2022 10.0.20348 (on Github), R 4.4.1 +* Ubuntu 22.04.4 (on Github), R 4.4.1 ## R CMD check results 0 errors | 0 warnings | 0 notes + +* This release is expected to create errors with older versions of manynet, +but should create no errors with the latest version +* Since manynet is a Depends for migraph, this should not pose a problem for users +upgrading diff --git a/inst/migraph.png b/inst/migraph.png index 80fc85e57..13092a832 100644 Binary files a/inst/migraph.png and b/inst/migraph.png differ diff --git a/inst/migraph_logo.png b/inst/migraph_logo.png new file mode 100644 index 000000000..3b2f31414 Binary files /dev/null and b/inst/migraph_logo.png differ diff --git a/inst/migraph_old.png b/inst/migraph_old.png new file mode 100644 index 000000000..80fc85e57 Binary files /dev/null and b/inst/migraph_old.png differ diff --git a/inst/tutorials/tutorial3/centrality.Rmd b/inst/tutorials/tutorial3/centrality.Rmd deleted file mode 100644 index ee7edcbea..000000000 --- a/inst/tutorials/tutorial3/centrality.Rmd +++ /dev/null @@ -1,343 +0,0 @@ ---- -title: "Centrality" -author: "by James Hollway" -output: - learnr::tutorial: - theme: journal -runtime: shiny_prerendered ---- - -```{r setup, include=FALSE} -library(learnr) -library(manynet) -library(migraph) -library(patchwork) -knitr::opts_chunk$set(echo = FALSE) -ison_brandes2 <- ison_brandes %>% rename(type = twomode_type) -``` - -## Calculating centrality - -For this exercise, we'll use the `ison_brandes` dataset in `{manynet}`. -This dataset is in a 'tidygraph' format, -but `manynet` makes it easy to coerce this into other forms -to be compatible with other packages. -We can create a two-mode version of the dataset -by renaming the nodal attribute "twomode_type" to just "type". -Let's begin by graphing these datasets using `manynet::autographr()`. - -```{r coercion, exercise = TRUE, purl = FALSE} - -``` - -```{r coercion-hint-1, purl = FALSE} -# Let's graph the one-mode version -autographr(____) -``` - -```{r coercion-hint-2, purl = FALSE} -# Now, let's create a two-mode version 'ison_brandes2' and graph it. -ison_brandes2 <- ison_brandes %>% rename(type = twomode_type) -autographr(____) -``` - -```{r coercion-solution, purl = FALSE} -# plot the one-mode version -autographr(ison_brandes) -ison_brandes2 <- ison_brandes %>% rename(type = twomode_type) -# plot the two-mode version -autographr(ison_brandes2) -``` - -The network is anonymous, but I think it would be nice to add some names, -even if it's just pretend. -Luckily, `{manynet}` has a function for this. -This makes plotting the network just a wee bit more accessible and interpretable: - -```{r addingnames, exercise = TRUE, purl = FALSE} -ison_brandes <- to_named(ison_brandes) -``` - -```{r addingnames-hint-1, purl = FALSE} -# Now, let's graph using the object names: "ison_brandes" -autographr(____) -``` - -```{r addingnames-solution} -ison_brandes <- to_named(ison_brandes) -# plot network with names -autographr(ison_brandes) -``` - -Note that you will likely get a different set of names, -as they are assigned randomly from a pool of (American) first names. - -### Degree centrality - -Let's start with calculating degree, as it is easy to calculate yourself. -Just sum the rows or columns of the matrix! - -```{r degreesum, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r degreesum-hint-1, purl = FALSE} -# We can calculate degree centrality like this: -(mat <- as_matrix(ison_brandes)) -(degrees <- rowSums(mat)) -rowSums(mat) == colSums(mat) -``` - -```{r degreesum-hint-2, purl = FALSE} -# Or by using a built in command in migraph like this: -node_degree(ison_brandes, normalized = FALSE) -``` - -```{r degreesum-solution} -# manually calculate degree centrality -mat <- as_matrix(ison_brandes) -degrees <- rowSums(mat) -rowSums(mat) == colSums(mat) -# You can also just use a built in command in migraph though: -node_degree(ison_brandes, normalized = FALSE) -``` - -```{r degreesum-Q, echo=FALSE, purl = FALSE} -question("Are the row sums the same as the column sums?", - answer("Yes", - correct = TRUE, - message = "That's right, that's because this is an undirected network."), - answer("No"), - allow_retry = FALSE -) -``` - -Often we are interested in the distribution of (degree) centrality in a network. -`{migraph}` offers a way to get a pretty good first look at this distribution, -though there are more elaborate ways to do this in base and grid graphics. - -```{r distrib, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r distrib-solution} -# distribution of degree centrality scores of nodes -plot(node_degree(ison_brandes)) -``` - -What's plotted here by default is both the degree distribution as a histogram, -as well as a density plot overlaid on it. -What kind of shape does this have? - -### Other centralities - -Other measures of centrality can be a little trickier to calculate by hand. -Fortunately, we can use functions from `{migraph}` to help calculate the -betweenness, closeness, and eigenvector centralities for each node in the network. -Let's collect the vectors of these centralities for the `ison_brandes` dataset: - -```{r micent, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r micent-hint-1, purl = FALSE} -# Use the node_betweenness() function to calculate the -# betweenness centralities of nodes in a network -node_betweenness(ison_brandes) -``` - -```{r micent-hint-2, purl = FALSE} -# Use the node_closeness() function to calculate the -# closeness centrality of nodes in a network -node_closeness(ison_brandes) -``` - -```{r micent-hint-3, purl = FALSE} -# Use the node_eigenvector() function to calculate -# the eigenvector centrality of nodes in a network -node_eigenvector(ison_brandes) -``` - -```{r micent-solution} -node_betweenness(ison_brandes) -node_closeness(ison_brandes) -node_eigenvector(ison_brandes) -# TASK: Can you create degree distributions for each of these? -``` - -What is returned here are vectors of betweenness, closeness, and eigenvector scores -for the nodes in the network. -But what do they mean? -Try to answer the following questions for yourself: - -- in what ways is a higher degree actor more 'central'? -- can you explain why a node that has the smallest sum of geodesic distances to all other nodes is said to be 'central'? -- why would an actor lying 'between' two other actors be 'central'? -- what does Bonacich mean when he says that power and influence are not the same thing? -- can you think of a real-world example when an actor might be central but not powerful, or powerful but not central? - -Note that all centrality measures in `{migraph}` return normalized -scores by default -- -for the raw scores, include `normalized = FALSE` in the function as an extra argument. - -## Plotting centrality - -It is straightforward in `{migraph}` to highlight nodes and ties -with maximum or minimum (e.g. degree) scores. -If the vector is numeric (i.e. a "measure"), -then this can be easily converted into a logical vector that -identifies the node/tie with the maximum/minimum score using -e.g. `node_is_max()` or `tie_is_min()`. -By passing this attribute to the `autographr()` argument "node_color" -we can highlight which node or nodes hold the maximum score in red. - -```{r ggid, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r ggid-solution} -# plot the network, highlighting the node with the highest centrality score with a different colour -ison_brandes %>% - add_node_attribute("color", node_is_max(node_degree(ison_brandes))) %>% - autographr(node_color = "color") - -ison_brandes %>% - add_node_attribute("color", node_is_max(node_betweenness(ison_brandes))) %>% - autographr(node_color = "color") - -ison_brandes %>% - add_node_attribute("color", node_is_max(node_closeness(ison_brandes))) %>% - autographr(node_color = "color") - -ison_brandes %>% - add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes))) %>% - autographr(node_color = "color") -``` - -How neat! Try it with the two-mode version. -What can you see? - -```{r ggid_twomode, exercise = TRUE, purl = FALSE} -# Instead of "ison_brandes", use "ison_brandes2" - -``` - -```{r ggid_twomode-solution} -ison_brandes2 %>% - add_node_attribute("color", node_is_max(node_degree(ison_brandes2))) %>% - autographr(node_color = "color") - -ison_brandes2 %>% - add_node_attribute("color", node_is_max(node_betweenness(ison_brandes2))) %>% - autographr(node_color = "color") - -ison_brandes2 %>% - add_node_attribute("color", node_is_max(node_closeness(ison_brandes2))) %>% - autographr(node_color = "color") - -ison_brandes2 %>% - add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes2))) %>% - autographr(node_color = "color") -``` - -```{r brandes2quiz, purl = FALSE} -question("Select all that are true for the two-mode Brandes network.", - answer("Only one node is selected in each plot."), - answer("The maximum degree square has a higher degree than the maximum degree circle(s).", - correct = TRUE), - answer("No node is ever the most central according to two or more different centrality measures."), - allow_retry = TRUE, - random_answer_order = TRUE) -``` - -## Calculating centralization - -`{migraph}` also implements network centralization functions. -Here we are no longer interested in the level of the node, -but in the level of the whole network, -so the syntax replaces `node_` with `network_`: - -```{r centzn, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r centzn-solution} -network_degree(ison_brandes) -network_betweenness(ison_brandes) -network_closeness(ison_brandes) -network_eigenvector(ison_brandes) -``` - -By default, scores are printed to 3 decimal places, -but this can be modified and, in any case, -the unrounded values are retained internally. -This means that even if rounded values are printed, -as much precision as is available is used in further calculations. - -Note that for centralization in two-mode networks, -two values are given (as a named vector), -since normalization typically depends on the (asymmetric) -number of nodes in each mode. - -What if we want to have a single image/figure with multiple plots? -This can be a little tricky with gg-based plots, -but fortunately the `{patchwork}` package is here to help. - -```{r multiplot, exercise = TRUE, exercise.setup = "addingnames", purl = FALSE} - -``` - -```{r multiplot-solution} -ison_brandes <- ison_brandes %>% - add_node_attribute("degree", - node_is_max(node_degree(ison_brandes))) %>% - add_node_attribute("betweenness", - node_is_max(node_betweenness(ison_brandes))) %>% - add_node_attribute("closeness", - node_is_max(node_closeness(ison_brandes))) %>% - add_node_attribute("eigenvector", - node_is_max(node_eigenvector(ison_brandes))) -gd <- autographr(ison_brandes, node_color = "degree") + - ggtitle("Degree", subtitle = round(network_degree(ison_brandes), 2)) -gc <- autographr(ison_brandes, node_color = "closeness") + - ggtitle("Closeness", subtitle = round(network_closeness(ison_brandes), 2)) -gb <- autographr(ison_brandes, node_color = "betweenness") + - ggtitle("Betweenness", subtitle = round(network_betweenness(ison_brandes), 2)) -ge <- autographr(ison_brandes, node_color = "eigenvector") + - ggtitle("Eigenvector", subtitle = round(network_eigenvector(ison_brandes), 2)) -(gd | gb) / (gc | ge) -# ggsave("brandes-centralities.pdf") -``` - - -```{r centzdq, purl = FALSE} -question("How centralized is the ison_brandes network? Select all that apply.", - answer("It is more degree centralised than betweenness centralised.", - message = "Degree centralisation is at 0.18 for this network whereas betweenness centralisation is at 0.32. In other words, the network is better characterised as having 1 or 2 nodes lying on the shortest paths between others than one where 1 or 2 nodes have many more ties than the others."), - answer("It is more closeness centralised than betweenness centralised.", - message = "Closeness centralisation is at 0.23 for this network whereas betweenness centralisation is at 0.32. In other words, the network is better characterised as having 1 or 2 nodes lying on the shortest paths between others than one where 1 or 2 nodes can reach or access most other nodes."), - answer("It is more eigenvector centralised than betweenness centralised.", - correct = TRUE, - message = "That's right, eigenvector centralisation is at 0.48 for this network whereas betweenness centralisation is at 0.32. In other words, the network is better characterised as having a core (or cores) of well-connected nodes rather than a wide network with only 1 or 2 nodes lying on the shortest paths between others."), - random_answer_order = TRUE, - allow_retry = TRUE) -``` - -```{r centvcent, echo=FALSE, purl = FALSE} -question("What is the difference between centrality and centralisation according to the literature?", - answer("Centrality is for nodes and centralisation is for networks", - correct = TRUE), - answer("Centrality is a state and centralisation is a process"), - answer("Centrality is a ity and centralisation is a sation"), - answer("Centrality is to centralisation what polarity is to polarisation"), - allow_retry = FALSE, - random_answer_order = TRUE -) -``` - -## Tasks - -1. Name a plausible research question you could ask of this data -for each of the four main centrality measures -(degree, betweenness, closeness, eigenvector) -You may want to add these as titles or subtitles to each plot. diff --git a/inst/tutorials/tutorial3/centrality.html b/inst/tutorials/tutorial3/centrality.html deleted file mode 100644 index a1df29792..000000000 --- a/inst/tutorials/tutorial3/centrality.html +++ /dev/null @@ -1,1098 +0,0 @@ - - - - - - - - - - - - - - - - - -Centrality - - - - - - - - - - - - - - - - - - - - - -Skip to Tutorial Content - - - -
-
- -
- -
-

Calculating centrality

-

For this exercise, we’ll use the ison_brandes dataset in -{manynet}. This dataset is in a ‘tidygraph’ format, but -manynet makes it easy to coerce this into other forms to be -compatible with other packages. We can create a two-mode version of the -dataset by renaming the nodal attribute “twomode_type” to just “type”. -Let’s begin by graphing these datasets using -manynet::autographr().

-
- -
-
-
# Let's graph the one-mode version
-autographr(____)
-
-
-
# Now, let's create a two-mode version 'ison_brandes2' and graph it.
-ison_brandes2 <- ison_brandes %>% rename(type = twomode_type)
-autographr(____)
-
-
-
# plot the one-mode version
-autographr(ison_brandes)
-ison_brandes2 <- ison_brandes %>% rename(type = twomode_type)
-# plot the two-mode version
-autographr(ison_brandes2)
-
-

The network is anonymous, but I think it would be nice to add some -names, even if it’s just pretend. Luckily, {manynet} has a -function for this. This makes plotting the network just a wee bit more -accessible and interpretable:

-
-
ison_brandes <- to_named(ison_brandes)
- -
-
-
# Now, let's graph using the object names: "ison_brandes"
-autographr(____)
-
-
-
ison_brandes <- to_named(ison_brandes)
-# plot network with names
-autographr(ison_brandes)
-
-

Note that you will likely get a different set of names, as they are -assigned randomly from a pool of (American) first names.

-
-

Degree centrality

-

Let’s start with calculating degree, as it is easy to calculate -yourself. Just sum the rows or columns of the matrix!

-
- -
-
-
# We can calculate degree centrality like this:
-(mat <- as_matrix(ison_brandes))
-(degrees <- rowSums(mat))
-rowSums(mat) == colSums(mat)
-
-
-
# Or by using a built in command in migraph like this:
-node_degree(ison_brandes, normalized = FALSE)
-
-
-
# manually calculate degree centrality
-mat <- as_matrix(ison_brandes)
-degrees <- rowSums(mat)
-rowSums(mat) == colSums(mat)
-# You can also just use a built in command in migraph though:
-node_degree(ison_brandes, normalized = FALSE)
-
-
-
-
-
-
- -
-
-

Often we are interested in the distribution of (degree) centrality in -a network. {migraph} offers a way to get a pretty good -first look at this distribution, though there are more elaborate ways to -do this in base and grid graphics.

-
- -
-
-
# distribution of degree centrality scores of nodes
-plot(node_degree(ison_brandes))
-
-

What’s plotted here by default is both the degree distribution as a -histogram, as well as a density plot overlaid on it. What kind of shape -does this have?

-
-
-

Other centralities

-

Other measures of centrality can be a little trickier to calculate by -hand. Fortunately, we can use functions from {migraph} to -help calculate the betweenness, closeness, and eigenvector centralities -for each node in the network. Let’s collect the vectors of these -centralities for the ison_brandes dataset:

-
- -
-
-
# Use the node_betweenness() function to calculate the
-# betweenness centralities of nodes in a network
-node_betweenness(ison_brandes)
-
-
-
# Use the node_closeness() function to calculate the 
-# closeness centrality of nodes in a network
-node_closeness(ison_brandes)
-
-
-
# Use the node_eigenvector() function to calculate 
-# the eigenvector centrality of nodes in a network
-node_eigenvector(ison_brandes)
-
-
-
node_betweenness(ison_brandes)
-node_closeness(ison_brandes)
-node_eigenvector(ison_brandes)
-# TASK: Can you create degree distributions for each of these?
-
-

What is returned here are vectors of betweenness, closeness, and -eigenvector scores for the nodes in the network. But what do they mean? -Try to answer the following questions for yourself:

-
    -
  • in what ways is a higher degree actor more ‘central’?
  • -
  • can you explain why a node that has the smallest sum of geodesic -distances to all other nodes is said to be ‘central’?
  • -
  • why would an actor lying ‘between’ two other actors be -‘central’?
  • -
  • what does Bonacich mean when he says that power and influence are -not the same thing?
  • -
  • can you think of a real-world example when an actor might be central -but not powerful, or powerful but not central?
  • -
-

Note that all centrality measures in {migraph} return -normalized scores by default – for the raw scores, include -normalized = FALSE in the function as an extra -argument.

-
-
-
-

Plotting centrality

-

It is straightforward in {migraph} to highlight nodes -and ties with maximum or minimum (e.g. degree) scores. If the vector is -numeric (i.e. a “measure”), then this can be easily converted into a -logical vector that identifies the node/tie with the maximum/minimum -score using e.g. node_is_max() or -tie_is_min(). By passing this attribute to the -autographr() argument “node_color” we can highlight which -node or nodes hold the maximum score in red.

-
- -
-
-
# plot the network, highlighting the node with the highest centrality score with a different colour
-ison_brandes %>%
-  add_node_attribute("color", node_is_max(node_degree(ison_brandes))) %>%
-  autographr(node_color = "color")
-
-ison_brandes %>%
-  add_node_attribute("color", node_is_max(node_betweenness(ison_brandes))) %>%
-  autographr(node_color = "color")
-
-ison_brandes %>%
-  add_node_attribute("color", node_is_max(node_closeness(ison_brandes))) %>%
-  autographr(node_color = "color")
-
-ison_brandes %>%
-  add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes))) %>%
-  autographr(node_color = "color")
-
-

How neat! Try it with the two-mode version. What can you see?

-
-
# Instead of "ison_brandes", use "ison_brandes2"
- -
-
-
ison_brandes2 %>%
-  add_node_attribute("color", node_is_max(node_degree(ison_brandes2))) %>%
-  autographr(node_color = "color")
-
-ison_brandes2 %>%
-  add_node_attribute("color", node_is_max(node_betweenness(ison_brandes2))) %>%
-  autographr(node_color = "color")
-
-ison_brandes2 %>%
-  add_node_attribute("color", node_is_max(node_closeness(ison_brandes2))) %>%
-  autographr(node_color = "color")
-
-ison_brandes2 %>%
-  add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes2))) %>%
-  autographr(node_color = "color")
-
-
-
-
-
-
- -
-
-
-
-

Calculating centralization

-

{migraph} also implements network centralization -functions. Here we are no longer interested in the level of the node, -but in the level of the whole network, so the syntax replaces -node_ with network_:

-
- -
-
-
network_degree(ison_brandes)
-network_betweenness(ison_brandes)
-network_closeness(ison_brandes)
-network_eigenvector(ison_brandes)
-
-

By default, scores are printed to 3 decimal places, but this can be -modified and, in any case, the unrounded values are retained internally. -This means that even if rounded values are printed, as much precision as -is available is used in further calculations.

-

Note that for centralization in two-mode networks, two values are -given (as a named vector), since normalization typically depends on the -(asymmetric) number of nodes in each mode.

-

What if we want to have a single image/figure with multiple plots? -This can be a little tricky with gg-based plots, but fortunately the -{patchwork} package is here to help.

-
- -
-
-
ison_brandes <- ison_brandes %>%
-  add_node_attribute("degree",
-                              node_is_max(node_degree(ison_brandes))) %>%
-  add_node_attribute("betweenness",
-                              node_is_max(node_betweenness(ison_brandes))) %>%
-  add_node_attribute("closeness",
-                              node_is_max(node_closeness(ison_brandes))) %>%
-  add_node_attribute("eigenvector",
-                              node_is_max(node_eigenvector(ison_brandes)))
-gd <- autographr(ison_brandes, node_color = "degree") + 
-  ggtitle("Degree", subtitle = round(network_degree(ison_brandes), 2))
-gc <- autographr(ison_brandes, node_color = "closeness") + 
-  ggtitle("Closeness", subtitle = round(network_closeness(ison_brandes), 2))
-gb <- autographr(ison_brandes, node_color = "betweenness") + 
-  ggtitle("Betweenness", subtitle = round(network_betweenness(ison_brandes), 2))
-ge <- autographr(ison_brandes, node_color = "eigenvector") + 
-  ggtitle("Eigenvector", subtitle = round(network_eigenvector(ison_brandes), 2))
-(gd | gb) / (gc | ge)
-# ggsave("brandes-centralities.pdf")
-
-
-
-
-
-
- -
-
-
-
-
-
-
- -
-
-
-
-

Tasks

-
    -
  1. Name a plausible research question you could ask of this data for -each of the four main centrality measures (degree, betweenness, -closeness, eigenvector) You may want to add these as titles or subtitles -to each plot. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  2. -
- - - - - - -
- -
- -
-
-
-
- - -
-

Centrality

-

by James Hollway

-
- - -
-
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/inst/tutorials/tutorial4/community.Rmd b/inst/tutorials/tutorial4/community.Rmd deleted file mode 100644 index a7e9003db..000000000 --- a/inst/tutorials/tutorial4/community.Rmd +++ /dev/null @@ -1,769 +0,0 @@ ---- -title: "Community" -author: "by James Hollway" -output: - learnr::tutorial: - theme: journal -runtime: shiny_prerendered ---- - -```{r setup, include=FALSE} -library(learnr) -library(patchwork) -library(manynet) -library(migraph) -knitr::opts_chunk$set(echo = FALSE) - -friends <- to_uniplex(ison_algebra, "friends") -social <- to_uniplex(ison_algebra, "social") -tasks <- to_uniplex(ison_algebra, "tasks") -``` - - -## Setting up - -The data we're going to use here, "ison_algebra", is included in the `{manynet}` package. -Do you remember how to call the data? -Can you find out some more information about it? - -```{r data, exercise = TRUE, purl = FALSE} - -``` - -```{r data-hint-1, purl = FALSE} -# Let's call and load the 'ison_algebra' dataset -data("ison_algebra", package = "manynet") -# Or you can retrieve like this: -ison_algebra <- manynet::ison_algebra -``` - -```{r data-hint-2, purl = FALSE} -# If you want to learn more about the 'ison_algebra' dataset, use the following function (below) -?manynet::ison_algebra -``` - -```{r data-solution} -data("ison_algebra", package = "manynet") -?manynet::ison_algebra -# If you want to see the network object, you can run the name of the object -ison_algebra -# or print the code with brackets at the front and end of the code -(ison_algebra <- manynet::ison_algebra) -``` - -We can see after printing the object that the dataset is multiplex, -meaning that it contains several different types of ties: -friendship (friends), social (social) and task interactions (tasks). - -### Adding names - -The network is also anonymous, but I think it would be nice to add some names, -even if it's just pretend. -Luckily, `{manynet}` has a function for this, `to_named()`. -This makes plotting the network just a wee bit more accessible and interpretable. -Let's try adding names and graphing the network now: - -```{r addingnames, exercise=TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r addingnames-hint-1, purl = FALSE} -ison_algebra <- to_named(ison_algebra) -``` - -```{r addingnames-hint-2, purl = FALSE} -autographr(ison_algebra) -``` - -```{r addingnames-solution} -ison_algebra <- to_named(ison_algebra) -autographr(ison_algebra) -``` - -Note that you will likely get a different set of names, -as they are assigned randomly from a pool of (American) first names. - -### Separating multiplex networks - -As a multiplex network, -there are actually three different types of ties (friends, social, and tasks) -in this network. -We can extract them and graph them separately using `to_uniplex()`: - -```{r separatingnets, exercise=TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r separatingnets-hint-1, purl = FALSE} -# to_uniplex extracts ties of a single type, -# focusing on the 'friends' tie attribute here -friends <- to_uniplex(ison_algebra, "friends") -gfriend <- autographr(friends) + ggtitle("Friendship") -``` - -```{r separatingnets-hint-2, purl = FALSE} -# now let's focus on the 'social' tie attribute -social <- to_uniplex(ison_algebra, "social") -gsocial <- autographr(social) + ggtitle("Social") -``` - -```{r separatingnets-hint-3, purl = FALSE} -# and the 'tasks' tie attribute -tasks <- to_uniplex(ison_algebra, "tasks") -gtask <- autographr(tasks) + ggtitle("Task") -``` - -```{r separatingnets-hint-4, purl = FALSE} -# now, let's compare each attribute's graph, side-by-side -gfriend + gsocial + gtask -# if you get an error here, you may need to install and load -# the package 'patchwork'. -# It's highly recommended for assembling multiple plots together. -# Otherwise you can just plot them separately on different lines. -``` - -```{r separatingnets-solution} -friends <- to_uniplex(ison_algebra, "friends") -gfriend <- autographr(friends) + ggtitle("Friendship") - -social <- to_uniplex(ison_algebra, "social") -gsocial <- autographr(social) + ggtitle("Social") - -tasks <- to_uniplex(ison_algebra, "tasks") -gtask <- autographr(tasks) + ggtitle("Task") - -# We now have three separate networks depicting each type of tie from the ison_algebra network: -gfriend + gsocial + gtask -``` - -Note also that these are weighted networks. -`autographr()` automatically recognises these different weights and plots them. -Where useful (less dense directed networks), -`autographr()` also bends reciprocated arcs. -What (else) can we say about these three networks? - -## Cohesion - -Let's concentrate on the task network for now and calculate a few basic -measures of cohesion: -density, reciprocity, transitivity, and components. - -### Density - -Because this is a directed network, we can calculate the density as: - -```{r dens-explicit, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r dens-explicit-solution} -# calculating network density manually according to equation -network_ties(tasks)/(network_nodes(tasks)*(network_nodes(tasks)-1)) -``` - -but we can also just use the `{migraph}` function... - -```{r dens, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r dens-solution} -network_density(tasks) -``` - -Note that the various measures in `{migraph}` print results to three decimal points -by default, but the underlying result retains the same recurrence. -So same result... - -```{r dens-qa, echo=FALSE, purl = FALSE} -question("Is this network's density high or low?", - answer("High", - message = "The closer the value is to 1, the more dense the network and the more cohesive the network is as a whole."), - answer("Low", - correct = TRUE, - message = "The closer the value is to 0, the sparser the network and the less cohesive the network is as a whole.") -) -``` - -### Closure - -Next let's calculate _reciprocity_ in the task network. -While one could do this by hand, -it's more efficient to do this using the `{migraph}` package. -Can you guess the correct name of the function? - -```{r recip, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r recip-solution} -network_reciprocity(tasks) -# this function calculates the amount of reciprocity in the whole network -``` - -And let's calculate _transitivity_ in the task network. -Again, can you guess the correct name of this function? - -```{r trans, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r trans-solution} -network_transitivity(tasks) -# this function calculates the amount of transitivity in the whole network -``` - -We have collected measures of the task network's reciprocity -and transitivity, but we still need to interpret these measures. -These measures do not speak for themselves. - -```{r trans-interp, echo=FALSE, purl = FALSE} -question("What can we say about task closure in this network? Choose all that apply.", - answer("Transitivity for the task network is 0.568", - correct = TRUE), - answer("Transitivity for the task network is -0.568", - message = "Transivitity must be between 0 and 1."), - answer("Transitivity is quite low in this network", - message = "Transitivity is usually around 0.3 in most social networks."), - answer("Transitivity is quite high in this network", - correct = TRUE), - answer("Transitivity is likely higher in the task network than the friendship network", - correct = TRUE), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -### Components - -Now let's look at the friendship network, 'friends'. -We're interested here in how many _components_ there are. -By default, the `network_components()` function will -return the number of _strong_ components for directed networks. -For _weak_ components, you will need to first make the network undirected. -Remember the difference between weak and strong components? - -```{r weak-strong, echo = FALSE, purl = FALSE} -question("Weak components...", - answer("don't care about tie direction when establishing components.", - correct = TRUE), - answer("care about tie direction when establishing components."), - allow_retry = TRUE -) -``` - -```{r comp-no, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r comp-no-hint-1, purl = FALSE} -network_components(friends) -# note that friends is a directed network -# you can see this by calling the object 'friends' -# or by running `manynet::is_directed(friends)` -``` - -```{r comp-no-hint-2, purl = FALSE} -# Now let's look at the number of components for objects connected by an undirected edge -# Note: to_undirected() returns an object with all tie direction removed, -# so any pair of nodes with at least one directed edge -# will be connected by an undirected edge in the new network. -network_components(to_undirected(friends)) -``` - -```{r comp-no-solution} -# note that friends is a directed network -network_components(friends) -network_components(to_undirected(friends)) -``` - -```{r comp-interp, echo = FALSE, purl = FALSE} -question("How many components are there?", - answer("2", - message = "There are more than 2 components."), - answer("3", - message = "There are 3 _weak_ components.", - correct = TRUE), - answer("4", - message = "There are 4 _strong_ components.", - correct = TRUE), - answer("5", - message = "There are fewer than 5 components."), - allow_retry = TRUE -) -``` - -So we know how many components there are, -but maybe we're also interested in which nodes are members of which components? -`node_components()` returns a membership vector -that can be used to color nodes in `autographr()`: - -```{r comp-memb, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r comp-memb-hint-1, purl = FALSE} -friends <- friends %>% - mutate(weak_comp = node_components(to_undirected(friends)), - strong_comp = node_components(friends)) -# node_components returns a vector of nodes' memberships to components in the network -# here, we are adding the nodes' membership to components as an attribute in the network -# alternatively, we can also use the function `add_node_attribute()` -# eg. `add_node_attribute(friends, "weak_comp", node_components(to_undirected(friends)))` -``` - -```{r comp-memb-hint-2, purl = FALSE} -autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") + -autographr(friends, node_color = "strong_comp") + ggtitle("Strong components") -# by using the 'node_color' argument, we are telling autographr to colour -# the nodes in the graph according to the values of the 'weak_comp' attribute in the network -``` - -```{r comp-memb-solution} -friends <- friends %>% - mutate(weak_comp = node_components(to_undirected(friends)), - strong_comp = node_components(friends)) -autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") + -autographr(friends, node_color = "strong_comp") + ggtitle("Strong components") -``` - -```{r node-comp-interp, echo = FALSE, purl = FALSE} -question("Why is there a difference between the weak and strong components results?", - answer("Because one node has only incoming ties.", - correct = TRUE), - answer("Because three nodes cannot reach any other nodes.", - correct = TRUE), - answer("Because there is an extra isolate."), - answer("Because the tie strength matters."), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -## Community Detection - -Ok, the friendship network has 3-4 components, but how many 'groups' are there? -Just visually, it looks like there are two denser clusters within the main component. - -Today we'll use the 'friends' subgraph for exploring community detection methods. -For clarity and simplicity, -we will concentrate on the main component (the so-called 'giant' component) -and consider friendship undirected. -Can you guess how to make these changes to the 'friends' network? - -```{r manip-fri, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r manip-fri-hint-1, purl = FALSE} -# to_giant() returns an object that includes only the main component without any smaller components or isolates -(friends <- to_giant(friends)) -``` - -```{r manip-fri-hint-2, purl = FALSE} -(friends <- to_undirected(friends)) -``` - -```{r manip-fri-hint-3, purl = FALSE} -# now, let's graph the new network -autographr(friends) -``` - -```{r manip-fri-solution} -(friends <- to_giant(friends)) -(friends <- to_undirected(friends)) -autographr(friends) -``` - -Comparing `friends` before and after these operations, -you'll notice the number of ties decreases as reciprocated directed ties -are consolidated into single undirected ties, -and the number of nodes decreases as two isolates are removed. - -There is no one single best community detection algorithm. -Instead there are several, each with their strengths and weaknesses. -Since this is a rather small network, we'll focus on the following methods: -walktrap, edge betweenness, and fast greedy. -(Others are included in `{migraph}`/`{igraph}`) -As you use them, consider how they portray communities and consider which one(s) -afford a sensible view of the social world as cohesively organized. - -### Walktrap - -This algorithm detects communities through a series of short random walks, -with the idea that nodes encountered on any given random walk -are more likely to be within a community than not. -It was proposed by Pons and Latapy (2005). - -The algorithm initially treats all nodes as communities of their own, then -merges them into larger communities, still larger communities, and so on. -In each step a new community is created from two other communities, -and its ID will be one larger than the largest community ID so far. -This means that before the first merge we have n communities -(the number of vertices in the graph) numbered from zero to n-1. -The first merge creates community n, the second community n+1, etc. -This merge history is returned by the function: -` # ?igraph::cluster_walktrap` - -Note the "steps=" argument that specifies the length of the random walks. -While `{igraph}` sets this to 4 by default, -which is what is recommended by Pons and Latapy, -Waugh et al (2009) found that for many groups (Congresses), -these lengths did not provide the maximum modularity score. -To be thorough in their attempts to optimize modularity, they ran the walktrap -algorithm 50 times for each group (using random walks of lengths 1–50) and -selected the network partition with the highest modularity value from those 50. -They call this the "maximum modularity partition" and insert the parenthetical -"(though, strictly speaking, this cannot be proven to be the optimum without -computationally-prohibitive exhaustive enumeration (Brandes et al. 2008))." - -So let's try and get a community classification using the walktrap algorithm -with path lengths of the random walks specified to be 50. - -```{r walk, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r walk-hint-1, purl = FALSE} -# let's use the node_walktrap()function to create a hierarchical, -# agglomerative algorithm based on random walks, and assign it to -# an object - -friend_wt <- node_walktrap(friends, times=50) -friend_wt # note that it prints pretty, but underlying its just a vector: -``` - -```{r walk-hint-2, purl = FALSE} -c(friend_wt) - -# This says that dividing the graph into 2 communities maximises modularity, -# one with the nodes -which(friend_wt == 1) -# and the other -which(friend_wt == 2) -``` - -```{r walk-hint-3, purl = FALSE} -# resulting in a modularity of -network_modularity(friends, friend_wt) -``` - -```{r walk-solution} -friend_wt <- node_walktrap(friends, times=50) -friend_wt # note that it prints pretty, but underlying it is just a vector: -# c(friend_wt) - -# This says that dividing the graph into 2 communities maximises modularity, -# one with the nodes -which(friend_wt == 1) -# and the other -which(friend_wt == 2) -# resulting in a modularity of -network_modularity(friends, friend_wt) -``` - -We can also visualise the clusters on the original network -How does the following look? Plausible? - -```{r walkplot, exercise=TRUE, exercise.setup = "walk", purl = FALSE} -``` - -```{r walkplot-hint-1, purl = FALSE} -# plot 1: groups by node color - -friends <- friends %>% - mutate(walk_comm = friend_wt) -autographr(friends, node_color = "walk_comm") -``` - -```{r walkplot-hint-2, purl = FALSE} -#plot 2: groups by borders - -# to be fancy, we could even draw the group borders around the nodes using the node_group argument -autographr(friends, node_group = "walk_comm") -``` - -```{r walkplot-hint-3, purl = FALSE} -# plot 3: group and node colors - -# or both! -autographr(friends, - node_color = "walk_comm", - node_group = "walk_comm") + - ggtitle("Walktrap", - subtitle = round(network_modularity(friends, friend_wt), 3)) -# the function `round()` rounds the values to a specified number of decimal places -# here, we are telling it to round the network_modularity score to 3 decimal places, -# but the score is exactly 0.27 so only two decimal places are printed. -``` - -```{r walkplot-solution} -friends <- friends %>% - mutate(walk_comm = friend_wt) -autographr(friends, node_color = "walk_comm") -# to be fancy, we could even draw the group borders around the nodes using the node_group argument -autographr(friends, node_group = "walk_comm") -# or both! -autographr(friends, - node_color = "walk_comm", - node_group = "walk_comm") + - ggtitle("Walktrap", - subtitle = round(network_modularity(friends, friend_wt), 3)) -``` - -This can be helpful when polygons overlap to better identify membership -Or you can use node color and size to indicate other attributes... - -### Edge Betweenness - -Edge betweenness is like betweenness centrality but for ties not nodes. -The edge-betweenness score of an edge measures the number of -shortest paths from one vertex to another that go through it. - -The idea of the edge-betweenness based community structure detection is that -it is likely that edges connecting separate clusters have high edge-betweenness, -as all the shortest paths from one cluster to another must traverse through them. -So if we iteratively remove the edge with the highest edge-betweenness score -we will get a hierarchical map (dendrogram) of the communities in the graph. - -The following works similarly to walktrap, but no need to set a step length. - -```{r eb, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} -``` - -```{r eb-solution} -friend_eb <- node_edge_betweenness(friends) -friend_eb -``` - -How does community membership differ here from that found by walktrap? - -We can see how the edge betweenness community detection method works -here: http://jfaganuk.github.io/2015/01/24/basic-network-analysis/ - -To visualise the result: - -```{r ebplot, exercise=TRUE, exercise.setup = "eb", purl = FALSE} - -``` - -```{r ebplot-hint-1, purl = FALSE} -# create an object - -friends <- friends %>% - mutate(eb_comm = friend_eb) -``` - -```{r ebplot-hint-2, purl = FALSE} -# create a graph with a title and subtitle returning the modularity score - -autographr(friends, - node_color = "eb_comm", - node_group = "eb_comm") + - ggtitle("Edge-betweenness", - subtitle = round(network_modularity(friends, friend_eb), 3)) -``` - -```{r ebplot-solution} -friends <- friends %>% - mutate(eb_comm = friend_eb) -autographr(friends, - node_color = "eb_comm", - node_group = "eb_comm") + - ggtitle("Edge-betweenness", - subtitle = round(network_modularity(friends, friend_eb), 3)) -``` - -For more on this algorithm, see M Newman and M Girvan: Finding and -evaluating community structure in networks, Physical Review E 69, 026113 -(2004), https://arxiv.org/abs/cond-mat/0308217. - -### Fast Greedy - -This algorithm is the Clauset-Newman-Moore algorithm. -Whereas edge betweenness was divisive (top-down), -the fast greedy algorithm is agglomerative (bottom-up). - -At each step, the algorithm seeks a merge that would most increase modularity. -This is very fast, but has the disadvantage of being a greedy algorithm, -so it might not produce the best overall community partitioning, -although I personally find it both useful and in many cases quite "accurate". - -```{r fg, exercise=TRUE, exercise.setup = "separatingnets", purl = FALSE} - -``` - -```{r fg-hint-1, purl = FALSE} -friend_fg <- node_fast_greedy(friends) -friend_fg # Does this result in a different community partition? -network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure -``` - -```{r fg-hint-2, purl = FALSE} -# Again, we can visualise these communities in different ways: -friends <- friends %>% - mutate(fg_comm = friend_fg) -autographr(friends, - node_color = "fg_comm", - node_group = "fg_comm") + - ggtitle("Fast-greedy", - subtitle = round(network_modularity(friends, friend_fg), 3)) -# -``` - -```{r fg-solution} -friend_fg <- node_fast_greedy(friends) -friend_fg # Does this result in a different community partition? -network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure - -# Again, we can visualise these communities in different ways: -friends <- friends %>% - mutate(fg_comm = friend_fg) -autographr(friends, - node_color = "fg_comm", - node_group = "fg_comm") + - ggtitle("Fast-greedy", - subtitle = round(network_modularity(friends, friend_fg), 3)) -``` - -See A Clauset, MEJ Newman, C Moore: -Finding community structure in very large networks, -https://arxiv.org/abs/cond-mat/0408187 - -```{r comm-comp, echo=FALSE, purl = FALSE} -question("What is the difference between communities and components?", - answer("Communities and components are just different terms for the same thing"), - answer("Communities are a stricter form of component"), - answer("Components are about paths whereas communities are about the relationship between within-group and between-group ties", - correct = TRUE), - random_answer_order = TRUE, - allow_retry = TRUE) -``` - -## Two-mode network: Southern women - -The next dataset, 'ison_southern_women', is also available in `{manynet}`. -Let's load and graph the data. - -```{r setup-women, exercise=TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r setup-women-hint-1, purl = FALSE} -# let's load the data and analyze it -data("ison_southern_women") -ison_southern_women -``` - -```{r setup-women-hint-2, purl = FALSE} -autographr(ison_southern_women, node_color = "type") -autographr(ison_southern_women, "railway", node_color = "type") -``` - -```{r setup-women-solution} -data("ison_southern_women") -ison_southern_women -autographr(ison_southern_women, node_color = "type") -``` - -### Project two-mode network into two one-mode networks - -Now what if we are only interested in one part of the network? -For that, we can obtain a 'projection' of the two-mode network. -There are two ways of doing this. -The hard way... - -```{r hardway, exercise=TRUE, exercise.setup = "setup-women", purl = FALSE} - -``` - -```{r hardway-solution} -twomode_matrix <- as_matrix(ison_southern_women) -women_matrix <- twomode_matrix %*% t(twomode_matrix) -event_matrix <- t(twomode_matrix) %*% twomode_matrix -``` - -Or the easy way: - -```{r easyway, exercise=TRUE, exercise.setup = "setup-women", purl = FALSE} - -``` - -```{r easyway-hint-1, purl = FALSE} -# women-graph -# to_mode1(): Results in a weighted one-mode object that retains the row nodes from -# a two-mode object, and weights the ties between them on the basis of their joint -# ties to nodes in the second mode (columns) - -women_graph <- to_mode1(ison_southern_women) -autographr(women_graph) - -# note that projection `to_mode1` involves keeping one type of nodes -# this is different from to_uniplex above, which keeps one type of ties in the network -``` - -```{r easyway-hint-2, purl = FALSE} -# event-graph -# to_mode2(): Results in a weighted one-mode object that retains the column nodes from -# a two-mode object, and weights the ties between them on the basis of their joint ties -# to nodes in the first mode (rows) - -event_graph <- to_mode2(ison_southern_women) -autographr(event_graph) -``` - -```{r easyway-solution} -women_graph <- to_mode1(ison_southern_women) -autographr(women_graph) -event_graph <- to_mode2(ison_southern_women) -autographr(event_graph) -``` - -`{manynet}` also includes several other options for how to construct the projection. -Please see the help file for more details. - -```{r otherway, exercise=TRUE, exercise.setup = "setup-women", purl = FALSE} - -``` - -```{r otherway-solution} -autographr(to_mode2(ison_southern_women, similarity = "jaccard")) + ggtitle("Jaccard") + -autographr(to_mode2(ison_southern_women, similarity = "rand")) + ggtitle("Rand") + -autographr(to_mode2(ison_southern_women, similarity = "pearson")) + ggtitle("Pearson") + -autographr(to_mode2(ison_southern_women, similarity = "yule")) + ggtitle("Yule's Q") -``` - -Which women/events 'bind' which events/women? -Let's return to the question of cohesion. - -```{r twomode-cohesion, exercise=TRUE, exercise.setup = "setup-women", purl = FALSE} - -``` - -```{r twomode-cohesion-hint-1, purl = FALSE} -# network_equivalency(): Calculate equivalence or reinforcement in a (usually two-mode) network - -network_equivalency(ison_southern_women) -``` - -```{r twomode-cohesion-hint-2, purl = FALSE} -# network_transitivity(): Calculate transitivity in a network - -network_transitivity(women_graph) -network_transitivity(event_graph) -``` - -```{r twomode-cohesion-solution} -network_equivalency(ison_southern_women) -network_transitivity(women_graph) -network_transitivity(event_graph) -``` - -What do we learn from this? - -## Task/Unit Test - -1. Produce a plot comparing 3 community detection procedures used here on a -(women) projection of the 'ison_southern_women' dataset. Identify which you prefer, and explain why. -2. Explain in no more than a paragraph why projection can lead to misleading transitivity measures. -3. Explain in no more than a paragraph how structural balance might lead to group identity. diff --git a/inst/tutorials/tutorial4/community.html b/inst/tutorials/tutorial4/community.html deleted file mode 100644 index c4f6f411a..000000000 --- a/inst/tutorials/tutorial4/community.html +++ /dev/null @@ -1,2132 +0,0 @@ - - - - - - - - - - - - - - - - - -Community - - - - - - - - - - - - - - - - - - - - - -Skip to Tutorial Content - - - -
-
- -
- -
-

Setting up

-

The data we’re going to use here, “ison_algebra”, is included in the -{manynet} package. Do you remember how to call the data? -Can you find out some more information about it?

-
- -
-
-
# Let's call and load the 'ison_algebra' dataset
-data("ison_algebra", package = "manynet")
-# Or you can retrieve like this:
-ison_algebra <- manynet::ison_algebra
-
-
-
# If you want to learn more about the 'ison_algebra' dataset, use the following function (below)
-?manynet::ison_algebra
-
-
-
data("ison_algebra", package = "manynet")
-?manynet::ison_algebra
-# If you want to see the network object, you can run the name of the object
-ison_algebra
-# or print the code with brackets at the front and end of the code
-(ison_algebra <- manynet::ison_algebra)
-
-

We can see after printing the object that the dataset is multiplex, -meaning that it contains several different types of ties: friendship -(friends), social (social) and task interactions (tasks).

-
-

Adding names

-

The network is also anonymous, but I think it would be nice to add -some names, even if it’s just pretend. Luckily, {manynet} -has a function for this, to_named(). This makes plotting -the network just a wee bit more accessible and interpretable. Let’s try -adding names and graphing the network now:

-
- -
-
-
ison_algebra <- to_named(ison_algebra)
-
-
-
autographr(ison_algebra)
-
-
-
ison_algebra <- to_named(ison_algebra)
-autographr(ison_algebra)
-
-

Note that you will likely get a different set of names, as they are -assigned randomly from a pool of (American) first names.

-
-
-

Separating multiplex networks

-

As a multiplex network, there are actually three different types of -ties (friends, social, and tasks) in this network. We can extract them -and graph them separately using to_uniplex():

-
- -
-
-
# to_uniplex extracts ties of a single type,
-# focusing on the 'friends' tie attribute here
-friends <- to_uniplex(ison_algebra, "friends")
-gfriend <- autographr(friends) + ggtitle("Friendship")
-
-
-
# now let's focus on the 'social' tie attribute
-social <- to_uniplex(ison_algebra, "social")
-gsocial <- autographr(social) + ggtitle("Social")
-
-
-
# and the 'tasks' tie attribute
-tasks <- to_uniplex(ison_algebra, "tasks")
-gtask <- autographr(tasks) + ggtitle("Task")
-
-
-
# now, let's compare each attribute's graph, side-by-side
-gfriend + gsocial + gtask
-# if you get an error here, you may need to install and load
-# the package 'patchwork'.
-# It's highly recommended for assembling multiple plots together.
-# Otherwise you can just plot them separately on different lines.
-
-
-
friends <- to_uniplex(ison_algebra, "friends")
-gfriend <- autographr(friends) + ggtitle("Friendship")
-
-social <- to_uniplex(ison_algebra, "social")
-gsocial <- autographr(social) + ggtitle("Social")
-
-tasks <- to_uniplex(ison_algebra, "tasks")
-gtask <- autographr(tasks) + ggtitle("Task")
-
-# We now have three separate networks depicting each type of tie from the ison_algebra network:
-gfriend + gsocial + gtask
-
-

Note also that these are weighted networks. autographr() -automatically recognises these different weights and plots them. Where -useful (less dense directed networks), autographr() also -bends reciprocated arcs. What (else) can we say about these three -networks?

-
-
-
-

Cohesion

-

Let’s concentrate on the task network for now and calculate a few -basic measures of cohesion: density, reciprocity, transitivity, and -components.

-
-

Density

-

Because this is a directed network, we can calculate the density -as:

-
- -
-
-
# calculating network density manually according to equation
-network_ties(tasks)/(network_nodes(tasks)*(network_nodes(tasks)-1))
-
-

but we can also just use the {migraph} function…

-
- -
-
-
network_density(tasks)
-
-

Note that the various measures in {migraph} print -results to three decimal points by default, but the underlying result -retains the same recurrence. So same result…

-
-
-
-
-
- -
-
-
-
-

Closure

-

Next let’s calculate reciprocity in the task network. While -one could do this by hand, it’s more efficient to do this using the -{migraph} package. Can you guess the correct name of the -function?

-
- -
-
-
network_reciprocity(tasks)
-# this function calculates the amount of reciprocity in the whole network
-
-

And let’s calculate transitivity in the task network. Again, -can you guess the correct name of this function?

-
- -
-
-
network_transitivity(tasks)
-# this function calculates the amount of transitivity in the whole network
-
-

We have collected measures of the task network’s reciprocity and -transitivity, but we still need to interpret these measures. These -measures do not speak for themselves.

-
-
-
-
-
- -
-
-
-
-

Components

-

Now let’s look at the friendship network, ‘friends’. We’re interested -here in how many components there are. By default, the -network_components() function will return the number of -strong components for directed networks. For weak -components, you will need to first make the network undirected. Remember -the difference between weak and strong components?

-
-
-
-
-
- -
-
-
- -
-
-
network_components(friends)
-# note that friends is a directed network
-# you can see this by calling the object 'friends'
-# or by running `manynet::is_directed(friends)`
-
-
-
# Now let's look at the number of components for objects connected by an undirected edge
-# Note: to_undirected() returns an object with all tie direction removed, 
-# so any pair of nodes with at least one directed edge 
-# will be connected by an undirected edge in the new network.
-network_components(to_undirected(friends))
-
-
-
# note that friends is a directed network
-network_components(friends)
-network_components(to_undirected(friends))
-
-
-
-
-
-
- -
-
-

So we know how many components there are, but maybe we’re also -interested in which nodes are members of which components? -node_components() returns a membership vector that can be -used to color nodes in autographr():

-
- -
-
-
friends <- friends %>% 
-  mutate(weak_comp = node_components(to_undirected(friends)),
-         strong_comp = node_components(friends))
-# node_components returns a vector of nodes' memberships to components in the network
-# here, we are adding the nodes' membership to components as an attribute in the network
-# alternatively, we can also use the function `add_node_attribute()`
-# eg. `add_node_attribute(friends, "weak_comp", node_components(to_undirected(friends)))`
-
-
-
autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") +
-autographr(friends, node_color = "strong_comp") + ggtitle("Strong components")
-# by using the 'node_color' argument, we are telling autographr to colour 
-# the nodes in the graph according to the values of the 'weak_comp' attribute in the network 
-
-
-
friends <- friends %>% 
-  mutate(weak_comp = node_components(to_undirected(friends)),
-         strong_comp = node_components(friends))
-autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") +
-autographr(friends, node_color = "strong_comp") + ggtitle("Strong components")
-
-
-
-
-
-
- -
-
-
-
-
-

Community Detection

-

Ok, the friendship network has 3-4 components, but how many ‘groups’ -are there? Just visually, it looks like there are two denser clusters -within the main component.

-

Today we’ll use the ‘friends’ subgraph for exploring community -detection methods. For clarity and simplicity, we will concentrate on -the main component (the so-called ‘giant’ component) and consider -friendship undirected. Can you guess how to make these changes to the -‘friends’ network?

-
- -
-
-
# to_giant() returns an object that includes only the main component without any smaller components or isolates
-(friends <- to_giant(friends))
-
-
-
(friends <- to_undirected(friends))
-
-
-
# now, let's graph the new network
-autographr(friends)
-
-
-
(friends <- to_giant(friends))
-(friends <- to_undirected(friends))
-autographr(friends)
-
-

Comparing friends before and after these operations, -you’ll notice the number of ties decreases as reciprocated directed ties -are consolidated into single undirected ties, and the number of nodes -decreases as two isolates are removed.

-

There is no one single best community detection algorithm. Instead -there are several, each with their strengths and weaknesses. Since this -is a rather small network, we’ll focus on the following methods: -walktrap, edge betweenness, and fast greedy. (Others are included in -{migraph}/{igraph}) As you use them, consider -how they portray communities and consider which one(s) afford a sensible -view of the social world as cohesively organized.

-
-

Walktrap

-

This algorithm detects communities through a series of short random -walks, with the idea that nodes encountered on any given random walk are -more likely to be within a community than not. It was proposed by Pons -and Latapy (2005).

-

The algorithm initially treats all nodes as communities of their own, -then merges them into larger communities, still larger communities, and -so on. In each step a new community is created from two other -communities, and its ID will be one larger than the largest community ID -so far. This means that before the first merge we have n communities -(the number of vertices in the graph) numbered from zero to n-1. The -first merge creates community n, the second community n+1, etc. This -merge history is returned by the function: -# ?igraph::cluster_walktrap

-

Note the “steps=” argument that specifies the length of the random -walks. While {igraph} sets this to 4 by default, which is -what is recommended by Pons and Latapy, Waugh et al (2009) found that -for many groups (Congresses), these lengths did not provide the maximum -modularity score. To be thorough in their attempts to optimize -modularity, they ran the walktrap algorithm 50 times for each group -(using random walks of lengths 1–50) and selected the network partition -with the highest modularity value from those 50. They call this the -“maximum modularity partition” and insert the parenthetical “(though, -strictly speaking, this cannot be proven to be the optimum without -computationally-prohibitive exhaustive enumeration (Brandes et -al. 2008)).”

-

So let’s try and get a community classification using the walktrap -algorithm with path lengths of the random walks specified to be 50.

-
- -
-
-
# let's use the node_walktrap()function to create a hierarchical, 
-# agglomerative algorithm based on random walks, and assign it to
-# an object
-
-friend_wt <- node_walktrap(friends, times=50)
-friend_wt # note that it prints pretty, but underlying its just a vector:
-
-
-
c(friend_wt)
-
-# This says that dividing the graph into 2 communities maximises modularity,
-# one with the nodes 
-which(friend_wt == 1)
-# and the other 
-which(friend_wt == 2)
-
-
-
# resulting in a modularity of 
-network_modularity(friends, friend_wt)
-
-
-
friend_wt <- node_walktrap(friends, times=50)
-friend_wt # note that it prints pretty, but underlying it is just a vector:
-# c(friend_wt)
-
-# This says that dividing the graph into 2 communities maximises modularity,
-# one with the nodes 
-which(friend_wt == 1)
-# and the other 
-which(friend_wt == 2)
-# resulting in a modularity of 
-network_modularity(friends, friend_wt)
-
-

We can also visualise the clusters on the original network How does -the following look? Plausible?

-
- -
-
-
# plot 1: groups by node color
-
-friends <- friends %>% 
-  mutate(walk_comm = friend_wt)
-autographr(friends, node_color = "walk_comm")
-
-
-
#plot 2: groups by borders
-
-# to be fancy, we could even draw the group borders around the nodes using the node_group argument
-autographr(friends, node_group = "walk_comm")
-
-
-
# plot 3: group and node colors
-
-# or both!
-autographr(friends, 
-           node_color = "walk_comm", 
-           node_group = "walk_comm") +
-  ggtitle("Walktrap",
-    subtitle = round(network_modularity(friends, friend_wt), 3))
-# the function `round()` rounds the values to a specified number of decimal places
-# here, we are telling it to round the network_modularity score to 3 decimal places,
-# but the score is exactly 0.27 so only two decimal places are printed.
-
-
-
friends <- friends %>% 
-  mutate(walk_comm = friend_wt)
-autographr(friends, node_color = "walk_comm")
-# to be fancy, we could even draw the group borders around the nodes using the node_group argument
-autographr(friends, node_group = "walk_comm")
-# or both!
-autographr(friends, 
-           node_color = "walk_comm", 
-           node_group = "walk_comm") +
-  ggtitle("Walktrap",
-    subtitle = round(network_modularity(friends, friend_wt), 3))
-
-

This can be helpful when polygons overlap to better identify -membership Or you can use node color and size to indicate other -attributes…

-
-
-

Edge Betweenness

-

Edge betweenness is like betweenness centrality but for ties not -nodes. The edge-betweenness score of an edge measures the number of -shortest paths from one vertex to another that go through it.

-

The idea of the edge-betweenness based community structure detection -is that it is likely that edges connecting separate clusters have high -edge-betweenness, as all the shortest paths from one cluster to another -must traverse through them. So if we iteratively remove the edge with -the highest edge-betweenness score we will get a hierarchical map -(dendrogram) of the communities in the graph.

-

The following works similarly to walktrap, but no need to set a step -length.

-
- -
-
-
friend_eb <- node_edge_betweenness(friends)
-friend_eb
-
-

How does community membership differ here from that found by -walktrap?

-

We can see how the edge betweenness community detection method works -here: http://jfaganuk.github.io/2015/01/24/basic-network-analysis/

-

To visualise the result:

-
- -
-
-
# create an object
-
-friends <- friends %>% 
-  mutate(eb_comm = friend_eb)
-
-
-
# create a graph with a title and subtitle returning the modularity score
-
-autographr(friends, 
-           node_color = "eb_comm", 
-           node_group = "eb_comm") +
-  ggtitle("Edge-betweenness",
-    subtitle = round(network_modularity(friends, friend_eb), 3))
-
-
-
friends <- friends %>% 
-  mutate(eb_comm = friend_eb)
-autographr(friends, 
-           node_color = "eb_comm", 
-           node_group = "eb_comm") +
-  ggtitle("Edge-betweenness",
-    subtitle = round(network_modularity(friends, friend_eb), 3))
-
-

For more on this algorithm, see M Newman and M Girvan: Finding and -evaluating community structure in networks, Physical Review E 69, 026113 -(2004), https://arxiv.org/abs/cond-mat/0308217.

-
-
-

Fast Greedy

-

This algorithm is the Clauset-Newman-Moore algorithm. Whereas edge -betweenness was divisive (top-down), the fast greedy algorithm is -agglomerative (bottom-up).

-

At each step, the algorithm seeks a merge that would most increase -modularity. This is very fast, but has the disadvantage of being a -greedy algorithm, so it might not produce the best overall community -partitioning, although I personally find it both useful and in many -cases quite “accurate”.

-
- -
-
-
friend_fg <- node_fast_greedy(friends)
-friend_fg # Does this result in a different community partition?
-network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure
-
-
-
# Again, we can visualise these communities in different ways:
-friends <- friends %>% 
-  mutate(fg_comm = friend_fg)
-autographr(friends, 
-           node_color = "fg_comm", 
-           node_group = "fg_comm") +
-  ggtitle("Fast-greedy",
-    subtitle = round(network_modularity(friends, friend_fg), 3))
-# 
-
-
-
friend_fg <- node_fast_greedy(friends)
-friend_fg # Does this result in a different community partition?
-network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure
-
-# Again, we can visualise these communities in different ways:
-friends <- friends %>% 
-  mutate(fg_comm = friend_fg)
-autographr(friends, 
-           node_color = "fg_comm", 
-           node_group = "fg_comm") +
-  ggtitle("Fast-greedy",
-    subtitle = round(network_modularity(friends, friend_fg), 3))
-
-

See A Clauset, MEJ Newman, C Moore: Finding community structure in -very large networks, https://arxiv.org/abs/cond-mat/0408187

-
-
-
-
-
- -
-
-
-
-
-

Two-mode network: Southern women

-

The next dataset, ‘ison_southern_women’, is also available in -{manynet}. Let’s load and graph the data.

-
- -
-
-
# let's load the data and analyze it
-data("ison_southern_women")
-ison_southern_women
-
-
-
autographr(ison_southern_women, node_color = "type")
-autographr(ison_southern_women, "railway", node_color = "type")
-
-
-
data("ison_southern_women")
-ison_southern_women
-autographr(ison_southern_women, node_color = "type")
-
-
-

Project two-mode network into two one-mode networks

-

Now what if we are only interested in one part of the network? For -that, we can obtain a ‘projection’ of the two-mode network. There are -two ways of doing this. The hard way…

-
- -
-
-
twomode_matrix <- as_matrix(ison_southern_women)
-women_matrix <- twomode_matrix %*% t(twomode_matrix)
-event_matrix <- t(twomode_matrix) %*% twomode_matrix
-
-

Or the easy way:

-
- -
-
-
# women-graph
-# to_mode1(): Results in a weighted one-mode object that retains the row nodes from
-# a two-mode object, and weights the ties between them on the basis of their joint
-# ties to nodes in the second mode (columns)
-
-women_graph <- to_mode1(ison_southern_women)
-autographr(women_graph)
-
-# note that projection `to_mode1` involves keeping one type of nodes
-# this is different from to_uniplex above, which keeps one type of ties in the network
-
-
-
# event-graph
-# to_mode2(): Results in a weighted one-mode object that retains the column nodes from
-# a two-mode object, and weights the ties between them on the basis of their joint ties
-# to nodes in the first mode (rows)
-
-event_graph <- to_mode2(ison_southern_women)
-autographr(event_graph)
-
-
-
women_graph <- to_mode1(ison_southern_women)
-autographr(women_graph)
-event_graph <- to_mode2(ison_southern_women)
-autographr(event_graph)
-
-

{manynet} also includes several other options for how to -construct the projection. Please see the help file for more details.

-
- -
-
-
autographr(to_mode2(ison_southern_women, similarity = "jaccard")) + ggtitle("Jaccard") +
-autographr(to_mode2(ison_southern_women, similarity = "rand")) + ggtitle("Rand") +
-autographr(to_mode2(ison_southern_women, similarity = "pearson")) + ggtitle("Pearson") +
-autographr(to_mode2(ison_southern_women, similarity = "yule")) + ggtitle("Yule's Q")
-
-

Which women/events ‘bind’ which events/women? Let’s return to the -question of cohesion.

-
- -
-
-
# network_equivalency(): Calculate equivalence or reinforcement in a (usually two-mode) network
-
-network_equivalency(ison_southern_women)
-
-
-
# network_transitivity(): Calculate transitivity in a network
-
-network_transitivity(women_graph)
-network_transitivity(event_graph)
-
-
-
network_equivalency(ison_southern_women)
-network_transitivity(women_graph)
-network_transitivity(event_graph)
-
-

What do we learn from this?

-
-
-
-

Task/Unit Test

-
    -
  1. Produce a plot comparing 3 community detection procedures used here -on a (women) projection of the ‘ison_southern_women’ dataset. Identify -which you prefer, and explain why.
  2. -
  3. Explain in no more than a paragraph why projection can lead to -misleading transitivity measures.
  4. -
  5. Explain in no more than a paragraph how structural balance might -lead to group identity. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  6. -
- - - - - - -
- -
- -
-
-
-
- - -
-

Community

-

by James Hollway

-
- - -
-
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/inst/tutorials/tutorial5/position.Rmd b/inst/tutorials/tutorial5/position.Rmd deleted file mode 100644 index 3a5a5fca0..000000000 --- a/inst/tutorials/tutorial5/position.Rmd +++ /dev/null @@ -1,607 +0,0 @@ ---- -title: "Position" -author: "by James Hollway" -output: - learnr::tutorial: - theme: journal -runtime: shiny_prerendered ---- - -```{r setup, include = FALSE} -library(learnr) -library(manynet) -library(migraph) -library(patchwork) -knitr::opts_chunk$set(echo = FALSE) -``` - - -## Setting up - -For this session, we're going to use the "ison_algebra" dataset included in the `{manynet}` package. -Do you remember how to call the data? -Can you find out some more information about it via its help file? - -```{r data, exercise = TRUE, purl = FALSE} - -``` - -```{r data-hint-1, purl = FALSE} -# Let's call and load the 'ison_algebra' dataset -data("ison_algebra", package = "manynet") -# Or you can retrieve like this: -ison_algebra <- manynet::ison_algebra -``` - -```{r data-hint-2, purl = FALSE} -# If you want to learn more about the 'ison_algebra' dataset, use the following function (below) -?manynet::ison_algebra -``` - -```{r data-solution} -data("ison_algebra", package = "manynet") -?manynet::ison_algebra -# If you want to see the network object, you can run the name of the object -# ison_algebra -# or print the code with brackets at the front and end of the code -# (ison_algebra <- manynet::ison_algebra) -``` - -We can see that the dataset is multiplex, -meaning that it contains several different types of ties: -friendship (friends), social (social) and task interactions (tasks). - -### Separating multiplex networks - -As a multiplex network, -there are actually three different types of ties in this network. -We can extract them and investigate them separately using `to_uniplex()`. -Within the parentheses, put the multiplex object's name, -and then as a second argument put the name of the tie attribute in quotation marks. -Once you have extracted all three networks, -graph them and add a descriptive title. - -```{r separatingnets, exercise=TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r separatingnets-hint-1, purl = FALSE} -# Here's the basic idea/code syntax you will need to extract each type of network -# You will want to replace -____ <- to_uniplex(ison_algebra, _____) -``` - -```{r separatingnets-hint-4, purl = FALSE} -# Now, let's compare the each attribute's graph, side-by-side by using "+" -# Note: using "/" after each graph will order them vertically; however, it might not be best way -# See for example: -gfriend <- autographr(friends) + ggtitle("Friendship") -gfriend + gsocial + gtask -``` - -```{r separatingnets-solution} -friends <- to_uniplex(ison_algebra, "friends") -gfriend <- autographr(friends) + ggtitle("Friendship") - -social <- to_uniplex(ison_algebra, "social") -gsocial <- autographr(social) + ggtitle("Social") - -tasks <- to_uniplex(ison_algebra, "tasks") -gtask <- autographr(tasks) + ggtitle("Task") - -gfriend + gsocial + gtask -``` - -Note also that these are weighted networks. -`autographr()` automatically recognises these different weights and plots them. - -```{r strongties-qa, echo=FALSE, purl = FALSE} -question("If we interpret ties with higher weights as strong ties, and lesser weights as weak ties, then, according to network theory, where would we expect novel information to come from?", - answer("Weak ties", - correct = TRUE, - message = learnr::random_praise()), - answer("Strong ties", - message = learnr::random_encouragement()), - answer("Isolates", - message = learnr::random_encouragement()), - answer("Highest degree nodes", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -## Structural Holes and Constraint - -Our first question for this network, is where innovation and creative ideas -might be expected to appear. - -```{r structinnov-qa, echo=FALSE, purl = FALSE} -question("Which network concepts are associated with innovation?", - answer("Structural holes", - correct = TRUE, - message = learnr::random_praise()), - answer("Structural folds", - correct = TRUE), - answer("Structural balance", - message = learnr::random_encouragement()), - answer("Structural equivalence", - message = learnr::random_encouragement()), - answer("Structuralism", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -### Measuring structural holes - -```{r shmeasures-qa, echo=FALSE, purl = FALSE} -question("There are a number of measures that might be used to approximate the concept of structural holes. Select all that apply.", - answer("Constraint", - correct = TRUE, - message = learnr::random_praise()), - answer("Effective size", - correct = TRUE), - answer("Bridges", - correct = TRUE), - answer("Redundancy", - correct = TRUE), - answer("Efficiency", - correct = TRUE), - answer("Hierarchy", - correct = TRUE), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -Let's take a look at which actors are least _constrained_ -by their position in the *task* network to begin with. -`{migraph}` makes this easy enough with the `node_constraint()` function. - -```{r objects-setup, purl=FALSE} -alge <- to_named(ison_algebra) -friends <- to_uniplex(alge, "friends") -social <- to_uniplex(alge, "social") -tasks <- to_uniplex(alge, "tasks") -``` - -```{r constraint, exercise = TRUE, exercise.setup = "objects-setup", purl = FALSE} - -``` - -```{r constraint-hint, purl = FALSE} -node_constraint(____) -# Don't forget we want to look at which actors are least constrained by their position in the 'tasks' network -``` - -```{r constraint-solution} -node_constraint(tasks) -``` - -This function returns a vector of constraint scores that can range between 0 and 1. -Let's graph the network again, sizing the nodes according to this score. -We can also identify the node with the minimum constraint score using `node_is_min()`. - -```{r constraintplot, exercise=TRUE, exercise.setup = "objects-setup", purl = FALSE} - -``` - -```{r constraintplot-hint-1, purl = FALSE} -tasks <- tasks %>% - mutate(constraint = node_constraint(____), - low_constraint = node_is_min(node_constraint(____))) - -# Don't forget, we are still looking at the 'tasks' network -``` - -```{r constraintplot-hint-3, purl = FALSE} -# Now, let's graph the network -# Note 1: we are looking at the 'tasks' network -# Note 2: we are interested in the actors 'least constrained' by their position - -autographr(____, node_color = "____") -``` - -```{r constraintplot-hint-4, purl = FALSE} -autographr(tasks, node_size = "constraint", node_color = "low_constraint") -``` - -```{r constraintplot-solution} -tasks <- tasks %>% - mutate(constraint = node_constraint(tasks), - low_constraint = node_is_min(node_constraint(tasks))) -autographr(tasks, node_size = "constraint", node_color = "low_constraint") -``` - -Why minimum? Because constraint measures how well connected each node's partners are, -with the implication that having few partners that are already connected to each other puts a node in an advantageous position to identify and share novel solutions to problems. -So what can we learn from this plot -about where innovation might occur within this network? - -## Structural Equivalence - -Next we might ask ourselves what (other) roles there are in the network? -We want to know who plays what role in this algebra class. -Let us begin with structural equivalence. - -```{r equiv-qa, echo=FALSE, purl = FALSE} -question("Structural equivalence means identifying classes of nodes with...", - answer("same/similar tie partners.", - correct = TRUE, - message = learnr::random_praise()), - answer("same/similar pattern of ties.", - message = "This is the definition for regular equivalence."), - answer("same/similar distance from all others.", - message = "This is the definition for automorphic equivalence.") -) -``` - -We're going to identify structurally equivalent positions -across all the data that we have, including 'task', 'social', and 'friend' ties. -So that is, we are using the multiplex `ison_algebra` dataset again and not -a uniplex subgraph thereof. - -### Finding structurally equivalent classes - -In `{migraph}`, finding how the nodes of a network can be partitioned -into structurally equivalent classes can be as easy as: - -```{r find-se, exercise = TRUE, exercise.setup = "data"} -node_structural_equivalence(ison_algebra) - -ison_algebra %>% - mutate(se = node_structural_equivalence(ison_algebra)) %>% - autographr(node_color = "se") -``` - -But actually, a lot is going on behind the scenes here that we can unpack. -Understanding what is going on behind the scenes is important for understanding -how these classes are identified and how to interpret them. - -### Step one: starting with a census - -All equivalence classes are based on nodes' similarity across some profile of motifs. -In `{migraph}`, we call these motif *censuses*. -Any kind of census can be used, and `{migraph}` includes a few options, -but `node_structural_equivalence()` is based off of the census of all the nodes' ties, -both outgoing and incoming ties, to characterise their relationships to tie partners. - -```{r construct-cor, exercise = TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r construct-cor-hint-1, purl = FALSE} -# Let's use the node_tie_census() function -# The function accepts an object such as a dataset -# Hint: Which dataset are we using in this tutorial? -node_tie_census(____) -``` - -```{r construct-cor-hint-2, purl = FALSE} -node_tie_census(ison_algebra) -``` - -```{r construct-cor-hint-3, purl = FALSE} -# Now, let's get the dimensions of an object via the dim() function -dim(node_tie_census(ison_algebra)) -``` - -```{r construct-cor-solution} -node_tie_census(ison_algebra) -dim(node_tie_census(ison_algebra)) -``` - -We can see that the result is a matrix of 16 rows -and 96 columns, -because we want to catalogue or take a census of all the different incoming/outgoing partners -our 16 nodes might have across these three networks. -Note also that the result is a weighted matrix; -what would you do if you wanted it to be binary? - -```{r construct-binary, exercise = TRUE, exercise.setup = "data", purl = FALSE} - -``` - -```{r construct-binary-hint, purl = FALSE} -# we could convert the result using as.matrix, returning the ties -as.matrix((node_tie_census(ison_algebra)>0)+0) - -``` - -```{r construct-binary-solution} -# But it's easier to simplify the network by removing the classification into different types of ties. -# Note that this also reduces the total number of possible paths between nodes -ison_algebra %>% - select_ties(-type) %>% - node_tie_census() -``` - -Note that `node_tie_census()` does not need to be passed to `node_structural_equivalence()` --- -this is done automatically! -However, the more generic `node_equivalence()` is available and can be used with whichever tie census is desired. -Feel free to explore using some of the other censuses available in `{migraph}`, -though some common ones are already used in the other equivalence convenience functions, -e.g. `node_triad_census()` in `node_regular_equivalence()` -and `node_path_census()` in `node_automorphic_equivalence()`. - -### Step two: growing a tree of similarity - -The next part takes this census and creates a dendrogram based on distance or dissimilarity among the nodes' census profiles. -This is all done internally within e.g. `node_structural_equivalence()`, -though there are two important parameters that can be set to obtain different results. - -First, users can set the type of distance measure used. -For enthusiasts, this is passed on to `stats::dist()`, -so that help page should be consulted for more details. -By default `"euclidean"` is used. - -Second, we can also set the type of clustering algorithm employed. -By default, `{migraph}`'s equivalence functions use hierarchical clustering, `"hier"`, -but for compatibility and enthusiasts, we also offer `"concor"`, -which implements a CONCOR (CONvergence of CORrelations) algorithm. - -We can see the difference from varying the clustering algorithm and/or distance -by plotting the dendrograms (hidden) in the output from `node_structural_equivalence()`: - -```{r varyclust, exercise = TRUE, exercise.setup = "data"} -alge <- to_named(ison_algebra) # fake names to make comparison clearer -plot(node_structural_equivalence(alge, - cluster = "hier", distance = "euclidean")) - -# changing the type of distance used -plot(node_structural_equivalence(alge, - cluster = "hier", distance = "manhattan")) - -# changing the clustering algorithm -plot(node_structural_equivalence(alge, - cluster = "concor", distance = "euclidean")) -``` - -```{r scale-interp, echo = FALSE, purl = FALSE} -question("Do you see any differences?", - answer("Yes", correct = TRUE, message = learnr::random_praise()), - answer("No"), - allow_retry = TRUE) -``` - -So plotting a `membership` vector from `{migraph}` returns a dendrogram -with the names of the nodes on the _y_-axis and the distance between them on the _x_-axis. -Using the census as material, the distances between the nodes -is used to create a dendrogram of (dis)similarity among the nodes. -Basically, as we move to the right, we're allowing for -more and more dissimilarity among those we cluster together. -A fork or branching point indicates the level of dissimilarity -at which those two or more nodes would be said to be equivalent. -Where two nodes' branches join/fork represents the maximum distance among all their leaves, -so more similar nodes' branches fork closer to the tree's canopy, -and less similar (groups of) nodes don't join until they form basically the trunk. - -Note that with the results using the hierarchical clustering algorithm, -the distance directly affects the structure of the tree (and the results). - -The CONCOR dendrogram operates a bit differently to hierarchical clustering though. -Instead it represents how converging correlations repeatedly bifurcate -the nodes into one of two partitions. -As such the 'distance' is really just the (inverse) number of steps -of bifurcations until nodes belong to the same class. - -### Step three: identifying the number of clusters - -Another bit of information represented in the dendrogram -is where the tree should be cut (the dashed red line) and -how the nodes are assigned to the branches (clusters) present at that cut-point. - -But where does this red line come from? -Or, more technically, how do we identify the number of clusters -into which to assign nodes? - -`{migraph}` includes several different ways of establishing `k`, -or the number of clusters. -Remember, the further to the right the red line is -(the lower on the tree the cut point is) -the more dissimilar we're allowing nodes in the same cluster to be. -We could set this ourselves by just passing `k` an integer. - -```{r k-discrete, exercise = TRUE, exercise.setup = "varyclust"} -plot(node_structural_equivalence(alge, k = 2)) -``` - -But we're really just guessing. Maybe 2 is not the best `k`? -To establish what the best `k` is for this clustering exercise, -we need to iterate through a number of potential `k` -and consider their fitness by some metric. -There are a couple of options here. - -One is to consider, for each `k`, -how correlated this partition is with the observed network. -When there is one cluster for each vertex in the network, -cell values will be identical to the observed correlation matrix, -and when there is one cluster for the whole network, -the values will all be equal to the average correlation -across the observed matrix. -So the correlations in each by-cluster matrix are correlated with the observed -correlation matrix to see how well each by-cluster matrix fits the data. - -Of course, the perfect partition would then be -where all nodes are in their own cluster, -which is hardly 'clustering' at all. -Also, increasing `k` will always improve the correlation. -But if one were to plot these correlations as a line graph, -then we might expect there to be a relatively rapid increase -in correlation as we move from, for example, 3 clusters to 4 clusters, -but a relatively small increase from, for example, 13 clusters to 14 clusters. -By identifying the inflection point in this line graph, -`{migraph}` selects a number of clusters that represents a trade-off -between fit and parsimony. -This is the `k = "elbow"` method. - -The other option is to evaluate a candidate for `k` based -not on correlation but on a metric of -how similar each node in a cluster is to others in its cluster -_and_ how dissimilar each node is to those in a neighbouring cluster. -When averaged over all nodes and all clusters, -this provides a 'silhouette coefficient' for a candidate of `k`. -Choosing the number of clusters that maximizes this coefficient, -which is what `k = "silhouette"` does, -can return a somewhat different result to the elbow method. -See what we have here, with all other arguments held the same: - -```{r elbowsil, exercise = TRUE, exercise.setup = "varyclust"} -plot(node_structural_equivalence(alge, - k = "elbow")) -plot(node_structural_equivalence(alge, - k = "silhouette")) -``` - -Ok, so it looks like the elbow method returns `k == 3` as a good trade-off -between fit and parsimony. -The silhouette method, by contrast, sees `k == 4` as maximising cluster similarity -and dissimilarity. -Either is probably fine here, -and there is much debate around how to select the number of clusters anyway. -However, the silhouette method seems to do a better job of identifying how unique -the 16th node is. -The silhouette method is also the default in `{migraph}`. - -Note that there is a somewhat hidden parameter here, `range`. -Since testing across all possible numbers of clusters can get -computationally expensive (not to mention uninterpretable) for large networks, -`{migraph}` only considers up to 8 clusters by default. -This however can be modified to be higher or lower, e.g. `range = 16`. - -Finally, one last option is `k = "strict"`, -which only assigns nodes to the same partition -if there is zero distance between them. -This is quick and rigorous solution, -however oftentimes this misses the point in finding clusters of nodes that, -despite some variation, can be considered as similar on some dimension. - -```{r strict, exercise = TRUE, exercise.setup = "varyclust"} -plot(node_structural_equivalence(alge, k = "strict")) -``` - -Here for example, no two nodes have precisely the same tie-profile, -otherwise their branches would join/fork at a distance of 0. -As such, `k = "strict"` partitions the network into 16 clusters. -Where networks have a number of nodes with strictly the same profiles, -such a k-selection method might be helpful to recognise those in exactly the same structural position, -but here it essentially just reports nodes' identity. - -## Blockmodelling - -### Summarising profiles - -Ok, so now we have a result from establishing nodes' membership in structurally equivalent classes. -We can graph this of course, as above: - -```{r strplot, exercise = TRUE, exercise.setup = "varyclust"} -alge %>% - mutate(se = node_structural_equivalence(alge)) %>% - autographr(node_color = "se") -``` - -While this plot adds the structurally equivalent classes information to our earlier graph, -it doesn't really help us understand how the classes relate. -That is, we might be less interested in how the individuals in the different classes relate, and more interested in how the different classes relate in aggregate. - -One option that can be useful for characterising what -the profile of ties (partners) is for each position/equivalence class -is to use `summary()`. -It summarises some census result by a partition (equivalence/membership) assignment. -By default it takes the average of ties (values), -but this can be tweaked by assigning some other summary statistic as `FUN = `. - -```{r summ, exercise = TRUE, exercise.setup = "strplot", purl = FALSE} - -``` - -```{r summ-hint, purl = FALSE} -# Let's wrap node_tie_census inside the summary() function -# and pass it a membership result -summary(node_tie_census(____), - membership = ____) -``` - -```{r summ-solution} -summary(node_tie_census(alge), - membership = node_structural_equivalence(alge)) -``` - -This node census produces 96 columns, -$16 \text{nodes} * 2 \text{directions} * 3 \text{edge types}$, -it takes a bit to look through what varies between the different classes -as 'blocked'. -But only four rows (the four structurally equivalent classes, according to the default). - -Another way to do this is to plot the blockmodel as a whole. -Passing the `plot()` function an adjacency/incidence matrix -along with a membership vector allows the matrix to be sorted and framed -(without the membership vector, just the adjacency/incidence matrix is plotted): - -```{r block, exercise = TRUE, exercise.setup = "strplot", purl = FALSE} - -``` - -```{r block-hint, purl = FALSE} -# Let's plot the blockmodel using the plot() function we used for the dendrograms -# Instead of node_tie_census() let's us as_matrix() - -plot(as_matrix(____), - membership = ____) -``` - -```{r block-solution} -# plot the blockmodel for the whole network -plot(as_matrix(alge), - membership = node_structural_equivalence(alge)) - -# plot the blockmodel for the friends, tasks, and social networks separately -plot(as_matrix(friends), - membership = node_structural_equivalence(alge)) + -plot(as_matrix(tasks), - membership = node_structural_equivalence(alge)) + -plot(as_matrix(social), - membership = node_structural_equivalence(alge)) -``` - -By passing the membership argument our structural equivalence results, -the matrix is re-sorted to cluster or 'block' nodes from the same class together. -This can help us interpret the general relationships between classes. -For example, when we plot the friends, tasks, and social networks using the structural equivalence results, -we might characterise them like so: - -- The first group work together only in reciprocal pairs on tasks, -preferring to approach the nerd but also those of the other two roles. -While they hang out with each other socially quite a bit, friendship from groups 2 and 3 are preferred. -- The second group also work together only in reciprocal pairs, -preferring to work collaboratively with group 1 or also the nerd. -They also tend to count those from group 1 as friends, -and hang out with everyone else but themselves. -- The third group will work with either some in group 1 and 3, or 2, -but again prefer the nerd for task advice. -They are pretty good friends with each other though, -and pretty happy to socialise with everyone. -- The nerd is a loner, no friends, -but everyone hangs out with them for task advice. - -### Reduced graph - -Lastly, we can consider how _classes_ of nodes relate to one another in a blockmodel. -Let's use the 4-cluster solution on the valued network (though binary is possible too) -to create a _reduced graph_. -A reduced graph is a transformation of a network such that -the nodes are no longer the individual nodes but the groups of one or more nodes as a class, -and the ties between these blocked nodes can represent the sum or average tie between these classes. -Of course, this means that there can be self-ties or loops, -because even if the original network was simple (not complex), -any within-class ties will end up becoming loops and thus the network will be complex. - -```{r structblock, exercise = TRUE, exercise.setup = "varyclust", warning=FALSE} -(bm <- to_blocks(alge, node_structural_equivalence(alge))) - -bm <- bm %>% as_tidygraph %>% - mutate(name = c("Freaks", "Squares", "Nerds", "Geek")) -autographr(bm) -``` diff --git a/inst/tutorials/tutorial5/position.html b/inst/tutorials/tutorial5/position.html deleted file mode 100644 index 41817c9ba..000000000 --- a/inst/tutorials/tutorial5/position.html +++ /dev/null @@ -1,1670 +0,0 @@ - - - - - - - - - - - - - - - - - -Position - - - - - - - - - - - - - - - - - - - - - -Skip to Tutorial Content - - - -
-
- -
- -
-

Setting up

-

For this session, we’re going to use the “ison_algebra” dataset -included in the {manynet} package. Do you remember how to -call the data? Can you find out some more information about it via its -help file?

-
- -
-
-
# Let's call and load the 'ison_algebra' dataset
-data("ison_algebra", package = "manynet")
-# Or you can retrieve like this:
-ison_algebra <- manynet::ison_algebra
-
-
-
# If you want to learn more about the 'ison_algebra' dataset, use the following function (below)
-?manynet::ison_algebra
-
-
-
data("ison_algebra", package = "manynet")
-?manynet::ison_algebra
-# If you want to see the network object, you can run the name of the object
-# ison_algebra
-# or print the code with brackets at the front and end of the code
-# (ison_algebra <- manynet::ison_algebra)
-
-

We can see that the dataset is multiplex, meaning that it contains -several different types of ties: friendship (friends), social (social) -and task interactions (tasks).

-
-

Separating multiplex networks

-

As a multiplex network, there are actually three different types of -ties in this network. We can extract them and investigate them -separately using to_uniplex(). Within the parentheses, put -the multiplex object’s name, and then as a second argument put the name -of the tie attribute in quotation marks. Once you have extracted all -three networks, graph them and add a descriptive title.

-
- -
-
-
# Here's the basic idea/code syntax you will need to extract each type of network
-# You will want to replace
-____ <- to_uniplex(ison_algebra, _____)
-
-
-
# Now, let's compare the each attribute's graph, side-by-side by using "+"
-# Note: using "/" after each graph will order them vertically; however, it might not be best way
-# See for example:
-gfriend <- autographr(friends) + ggtitle("Friendship")
-gfriend + gsocial + gtask
-
-
-
friends <- to_uniplex(ison_algebra, "friends")
-gfriend <- autographr(friends) + ggtitle("Friendship")
-
-social <- to_uniplex(ison_algebra, "social")
-gsocial <- autographr(social) + ggtitle("Social")
-
-tasks <- to_uniplex(ison_algebra, "tasks")
-gtask <- autographr(tasks) + ggtitle("Task")
-
-gfriend + gsocial + gtask
-
-

Note also that these are weighted networks. autographr() -automatically recognises these different weights and plots them.

-
-
-
-
-
- -
-
-
-
-
-

Structural Holes and Constraint

-

Our first question for this network, is where innovation and creative -ideas might be expected to appear.

-
-
-
-
-
- -
-
-
-

Measuring structural holes

-
-
-
-
-
- -
-
-

Let’s take a look at which actors are least constrained by -their position in the task network to begin with. -{migraph} makes this easy enough with the -node_constraint() function.

-
- -
-
-
node_constraint(____)
-# Don't forget we want to look at which actors are least constrained by their position in the 'tasks' network
-
-
-
node_constraint(tasks)
-
-

This function returns a vector of constraint scores that can range -between 0 and 1. Let’s graph the network again, sizing the nodes -according to this score. We can also identify the node with the minimum -constraint score using node_is_min().

-
- -
-
-
tasks <- tasks %>% 
-  mutate(constraint = node_constraint(____),
-         low_constraint = node_is_min(node_constraint(____)))
-
-# Don't forget, we are still looking at the 'tasks' network
-
-
-
# Now, let's graph the network
-# Note 1: we are looking at the 'tasks' network
-# Note 2: we are interested in the actors 'least constrained' by their position
-
-autographr(____, node_color = "____")
-
-
-
autographr(tasks, node_size = "constraint", node_color = "low_constraint")
-
-
-
tasks <- tasks %>% 
-  mutate(constraint = node_constraint(tasks), 
-         low_constraint = node_is_min(node_constraint(tasks)))
-autographr(tasks, node_size = "constraint", node_color = "low_constraint")
-
-

Why minimum? Because constraint measures how well connected each -node’s partners are, with the implication that having few partners that -are already connected to each other puts a node in an advantageous -position to identify and share novel solutions to problems. So what can -we learn from this plot about where innovation might occur within this -network?

-
-
-
-

Structural Equivalence

-

Next we might ask ourselves what (other) roles there are in the -network? We want to know who plays what role in this algebra class. Let -us begin with structural equivalence.

-
-
-
-
-
- -
-
-

We’re going to identify structurally equivalent positions across all -the data that we have, including ‘task’, ‘social’, and ‘friend’ ties. So -that is, we are using the multiplex ison_algebra dataset -again and not a uniplex subgraph thereof.

-
-

Finding structurally equivalent classes

-

In {migraph}, finding how the nodes of a network can be -partitioned into structurally equivalent classes can be as easy as:

-
-
node_structural_equivalence(ison_algebra)
-
-ison_algebra %>% 
-  mutate(se = node_structural_equivalence(ison_algebra)) %>% 
-  autographr(node_color = "se")
- -
-

But actually, a lot is going on behind the scenes here that we can -unpack. Understanding what is going on behind the scenes is important -for understanding how these classes are identified and how to interpret -them.

-
-
-

Step one: starting with a census

-

All equivalence classes are based on nodes’ similarity across some -profile of motifs. In {migraph}, we call these motif -censuses. Any kind of census can be used, and -{migraph} includes a few options, but -node_structural_equivalence() is based off of the census of -all the nodes’ ties, both outgoing and incoming ties, to characterise -their relationships to tie partners.

-
- -
-
-
# Let's use the node_tie_census() function
-# The function accepts an object such as a dataset
-# Hint: Which dataset are we using in this tutorial?
-node_tie_census(____)
-
-
-
node_tie_census(ison_algebra)
-
-
-
# Now, let's get the dimensions of an object via the dim() function
-dim(node_tie_census(ison_algebra))
-
-
-
node_tie_census(ison_algebra)
-dim(node_tie_census(ison_algebra))
-
-

We can see that the result is a matrix of 16 rows and 96 columns, -because we want to catalogue or take a census of all the different -incoming/outgoing partners our 16 nodes might have across these three -networks. Note also that the result is a weighted matrix; what would you -do if you wanted it to be binary?

-
- -
-
-
# we could convert the result using as.matrix, returning the ties 
-as.matrix((node_tie_census(ison_algebra)>0)+0)
-
-
-
# But it's easier to simplify the network by removing the classification into different types of ties.
-# Note that this also reduces the total number of possible paths between nodes
-ison_algebra %>%
-  select_ties(-c(friends, social, tasks)) %>%
-  node_tie_census()
-
-

Note that node_tie_census() does not need to be passed -to node_structural_equivalence() — this is done -automatically! However, the more generic node_equivalence() -is available and can be used with whichever tie census is desired. Feel -free to explore using some of the other censuses available in -{migraph}, though some common ones are already used in the -other equivalence convenience functions, -e.g. node_triad_census() in -node_regular_equivalence() and -node_path_census() in -node_automorphic_equivalence().

-
-
-

Step two: growing a tree of similarity

-

The next part takes this census and creates a dendrogram based on -distance or dissimilarity among the nodes’ census profiles. This is all -done internally within e.g. node_structural_equivalence(), -though there are two important parameters that can be set to obtain -different results.

-

First, users can set the type of distance measure used. For -enthusiasts, this is passed on to stats::dist(), so that -help page should be consulted for more details. By default -"euclidean" is used.

-

Second, we can also set the type of clustering algorithm employed. By -default, {migraph}’s equivalence functions use hierarchical -clustering, "hier", but for compatibility and enthusiasts, -we also offer "concor", which implements a CONCOR -(CONvergence of CORrelations) algorithm.

-

We can see the difference from varying the clustering algorithm -and/or distance by plotting the dendrograms (hidden) in the output from -node_structural_equivalence():

-
-
alge <- to_named(ison_algebra) # fake names to make comparison clearer
-plot(node_structural_equivalence(alge, 
-                                 cluster = "hier", distance = "euclidean"))
-
-# changing the type of distance used
-plot(node_structural_equivalence(alge, 
-                                 cluster = "hier", distance = "manhattan"))
-
-# changing the clustering algorithm
-plot(node_structural_equivalence(alge, 
-                                 cluster = "concor", distance = "euclidean"))
- -
-
-
-
-
-
- -
-
-

So plotting a membership vector from -{migraph} returns a dendrogram with the names of the nodes -on the y-axis and the distance between them on the -x-axis. Using the census as material, the distances between the -nodes is used to create a dendrogram of (dis)similarity among the nodes. -Basically, as we move to the right, we’re allowing for more and more -dissimilarity among those we cluster together. A fork or branching point -indicates the level of dissimilarity at which those two or more nodes -would be said to be equivalent. Where two nodes’ branches join/fork -represents the maximum distance among all their leaves, so more similar -nodes’ branches fork closer to the tree’s canopy, and less similar -(groups of) nodes don’t join until they form basically the trunk.

-

Note that with the results using the hierarchical clustering -algorithm, the distance directly affects the structure of the tree (and -the results).

-

The CONCOR dendrogram operates a bit differently to hierarchical -clustering though. Instead it represents how converging correlations -repeatedly bifurcate the nodes into one of two partitions. As such the -‘distance’ is really just the (inverse) number of steps of bifurcations -until nodes belong to the same class.

-
-
-

Step three: identifying the number of clusters

-

Another bit of information represented in the dendrogram is where the -tree should be cut (the dashed red line) and how the nodes are assigned -to the branches (clusters) present at that cut-point.

-

But where does this red line come from? Or, more technically, how do -we identify the number of clusters into which to assign nodes?

-

{migraph} includes several different ways of -establishing k, or the number of clusters. Remember, the -further to the right the red line is (the lower on the tree the cut -point is) the more dissimilar we’re allowing nodes in the same cluster -to be. We could set this ourselves by just passing k an -integer.

-
-
plot(node_structural_equivalence(alge, k = 2))
- -
-

But we’re really just guessing. Maybe 2 is not the best -k? To establish what the best k is for this -clustering exercise, we need to iterate through a number of potential -k and consider their fitness by some metric. There are a -couple of options here.

-

One is to consider, for each k, how correlated this -partition is with the observed network. When there is one cluster for -each vertex in the network, cell values will be identical to the -observed correlation matrix, and when there is one cluster for the whole -network, the values will all be equal to the average correlation across -the observed matrix. So the correlations in each by-cluster matrix are -correlated with the observed correlation matrix to see how well each -by-cluster matrix fits the data.

-

Of course, the perfect partition would then be where all nodes are in -their own cluster, which is hardly ‘clustering’ at all. Also, increasing -k will always improve the correlation. But if one were to -plot these correlations as a line graph, then we might expect there to -be a relatively rapid increase in correlation as we move from, for -example, 3 clusters to 4 clusters, but a relatively small increase from, -for example, 13 clusters to 14 clusters. By identifying the inflection -point in this line graph, {migraph} selects a number of -clusters that represents a trade-off between fit and parsimony. This is -the k = "elbow" method.

-

The other option is to evaluate a candidate for k based -not on correlation but on a metric of how similar each node in a cluster -is to others in its cluster and how dissimilar each node is to -those in a neighbouring cluster. When averaged over all nodes and all -clusters, this provides a ‘silhouette coefficient’ for a candidate of -k. Choosing the number of clusters that maximizes this -coefficient, which is what k = "silhouette" does, can -return a somewhat different result to the elbow method. See what we have -here, with all other arguments held the same:

-
-
plot(node_structural_equivalence(alge, 
-                                 k = "elbow"))
-plot(node_structural_equivalence(alge, 
-                                 k = "silhouette"))
- -
-

Ok, so it looks like the elbow method returns k == 3 as -a good trade-off between fit and parsimony. The silhouette method, by -contrast, sees k == 4 as maximising cluster similarity and -dissimilarity. Either is probably fine here, and there is much debate -around how to select the number of clusters anyway. However, the -silhouette method seems to do a better job of identifying how unique the -16th node is. The silhouette method is also the default in -{migraph}.

-

Note that there is a somewhat hidden parameter here, -range. Since testing across all possible numbers of -clusters can get computationally expensive (not to mention -uninterpretable) for large networks, {migraph} only -considers up to 8 clusters by default. This however can be modified to -be higher or lower, e.g. range = 16.

-

Finally, one last option is k = "strict", which only -assigns nodes to the same partition if there is zero distance between -them. This is quick and rigorous solution, however oftentimes this -misses the point in finding clusters of nodes that, despite some -variation, can be considered as similar on some dimension.

-
-
plot(node_structural_equivalence(alge, k = "strict"))
- -
-

Here for example, no two nodes have precisely the same tie-profile, -otherwise their branches would join/fork at a distance of 0. As such, -k = "strict" partitions the network into 16 clusters. Where -networks have a number of nodes with strictly the same profiles, such a -k-selection method might be helpful to recognise those in exactly the -same structural position, but here it essentially just reports nodes’ -identity.

-
-
-
-

Blockmodelling

-
-

Summarising profiles

-

Ok, so now we have a result from establishing nodes’ membership in -structurally equivalent classes. We can graph this of course, as -above:

-
-
alge %>% 
-  mutate(se = node_structural_equivalence(alge)) %>% 
-  autographr(node_color = "se")
- -
-

While this plot adds the structurally equivalent classes information -to our earlier graph, it doesn’t really help us understand how the -classes relate. That is, we might be less interested in how the -individuals in the different classes relate, and more interested in how -the different classes relate in aggregate.

-

One option that can be useful for characterising what the profile of -ties (partners) is for each position/equivalence class is to use -summary(). It summarises some census result by a partition -(equivalence/membership) assignment. By default it takes the average of -ties (values), but this can be tweaked by assigning some other summary -statistic as FUN =.

-
- -
-
-
# Let's wrap node_tie_census inside the summary() function
-# and pass it a membership result
-summary(node_tie_census(____),
-        membership = ____)
-
-
-
summary(node_tie_census(alge),
-        membership = node_structural_equivalence(alge))
-
-

This node census produces 96 columns, \(16 -\text{nodes} * 2 \text{directions} * 3 \text{edge types}\), it -takes a bit to look through what varies between the different classes as -‘blocked’. But only four rows (the four structurally equivalent classes, -according to the default).

-

Another way to do this is to plot the blockmodel as a whole. Passing -the plot() function an adjacency/incidence matrix along -with a membership vector allows the matrix to be sorted and framed -(without the membership vector, just the adjacency/incidence matrix is -plotted):

-
- -
-
-
# Let's plot the blockmodel using the plot() function we used for the dendrograms
-# Instead of node_tie_census() let's us as_matrix()
-
-plot(as_matrix(____),
-     membership = ____)
-
-
-
# plot the blockmodel for the whole network
-plot(as_matrix(alge),
-     membership = node_structural_equivalence(alge))
-
-# plot the blockmodel for the friends, tasks, and social networks separately
-plot(as_matrix(friends),
-     membership = node_structural_equivalence(alge)) +
-plot(as_matrix(tasks),
-     membership = node_structural_equivalence(alge)) +
-plot(as_matrix(social),
-     membership = node_structural_equivalence(alge))
-
-

By passing the membership argument our structural equivalence -results, the matrix is re-sorted to cluster or ‘block’ nodes from the -same class together. This can help us interpret the general -relationships between classes. For example, when we plot the friends, -tasks, and social networks using the structural equivalence results, we -might characterise them like so:

-
    -
  • The first group work together only in reciprocal pairs on tasks, -preferring to approach the nerd but also those of the other two roles. -While they hang out with each other socially quite a bit, friendship -from groups 2 and 3 are preferred.
  • -
  • The second group also work together only in reciprocal pairs, -preferring to work collaboratively with group 1 or also the nerd. They -also tend to count those from group 1 as friends, and hang out with -everyone else but themselves.
  • -
  • The third group will work with either some in group 1 and 3, or 2, -but again prefer the nerd for task advice. They are pretty good friends -with each other though, and pretty happy to socialise with -everyone.
  • -
  • The nerd is a loner, no friends, but everyone hangs out with them -for task advice.
  • -
-
-
-

Reduced graph

-

Lastly, we can consider how classes of nodes relate to one -another in a blockmodel. Let’s use the 4-cluster solution on the valued -network (though binary is possible too) to create a reduced -graph. A reduced graph is a transformation of a network such that -the nodes are no longer the individual nodes but the groups of one or -more nodes as a class, and the ties between these blocked nodes can -represent the sum or average tie between these classes. Of course, this -means that there can be self-ties or loops, because even if the original -network was simple (not complex), any within-class ties will end up -becoming loops and thus the network will be complex.

-
-
(bm <- to_blocks(alge, node_structural_equivalence(alge)))
-
-bm <- bm %>% as_tidygraph %>% 
-  mutate(name = c("Freaks", "Squares", "Nerds", "Geek"))
-autographr(bm)
- -
-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - -
-
- -
- -
-
-
-
- - -
-

Position

-

by James Hollway

-
- - -
-
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/inst/tutorials/tutorial6/topology.Rmd b/inst/tutorials/tutorial6/topology.Rmd deleted file mode 100644 index d92f09b12..000000000 --- a/inst/tutorials/tutorial6/topology.Rmd +++ /dev/null @@ -1,648 +0,0 @@ ---- -title: "Topology" -author: "by James Hollway, Andrea Biswas-Tortajada" -output: - learnr::tutorial: - theme: journal -runtime: shiny_prerendered ---- - -```{r setup, include = FALSE} -library(learnr) -library(manynet) -library(migraph) -library(patchwork) -knitr::opts_chunk$set(echo = FALSE) -learnr::random_phrases_add(language = "fr", - praise = c("C'est génial!", - "Beau travail", - "Excellent travail!", - "Bravo!", - "Super!", - "Bien fait", - "Bien joué", - "Tu l'as fait!", - "Je savais que tu pouvais le faire.", - "Ça a l'air facile!", - "C'était un travail de première classe.", - "C'est ce que j'appelle un bon travail!"), - encouragement = c("Bon effort", - "Vous l'avez presque maîtrisé!", - "Ça avance bien.", - "Continuez comme ça.", - "Continuez à travailler dur!", - "Vous apprenez vite!", - "Vous faites un excellent travail aujourd'hui.")) -learnr::random_phrases_add(language = "en", - praise = c("C'est génial!", - "Beau travail!", - "Bravo!", - "Super!"), - encouragement = c("Bon effort")) -``` - -In this tutorial, we'll explore: - -- how to create or generate different network topologies -- the core-periphery structure of a network -- features of a network related to its resilience - -## Generate networks of different structures - -This tutorial covers a range of different network topologies: -trees, lattices, random, small-world, scale-free, and core-periphery -networks. -These ideal networks exaggerate centrality, cohesion, and randomness features, -and are thus great for theory-building and investigating the relationship between rules and structure. - -In this practical, we're going to create/generate -a number of ideal-typical network topologies and plot them. -We'll first look at some deterministic algorithms for _creating_ networks -of different structures, -and then look at how the introduction of some randomness can _generate_ a variety of network structures. - -### Deterministic graphs - -To begin with, let's create a few 'empty' and full/'complete' graphs. -You will want to use some of the `create_*()` group of functions from `{manynet}`, -because they create graphs following some strict rule(s). -The two functions you will want to use here are `create_empty()` and `create_filled()`. -`create_empty()` creates an empty graph with the given number of nodes, -in this case 50 nodes. -For `create_filled()` we're creating a full graph, -where all of the nodes are connected to all of the other nodes. - -Let's say that we want to explore networks of fifty nodes in this script. -Graph one empty and one complete network with 50 nodes each, -give them an informative title, and plot the graphs together. -What would a complete network with half the nodes look like? -Add that too. - -```{r empty, exercise=TRUE, purl = FALSE} - -``` - -```{r empty-solution} -(autographr(create_empty(50), "circle") + ggtitle("Empty graph")) -(autographr(create_filled(50)) + ggtitle("Complete graph")) -(autographr(create_filled(50/2)) + ggtitle("Complete graph (smaller)")) -``` - -#### Stars - -In a star network, there is one node to which all other nodes are connected. -There is no transitivity. -The maximum path length is two. -And centrality is maximised! -This network maximises all centrality measures as one node acts -as the sole bridge connecting one part of the network to the other. - -Use the `create_star()` function to graph three star networks: - -- an undirected star network -- a out-directed star network -- and an in-directed star network - -```{r star, exercise = TRUE, purl = FALSE} - -``` - -```{r star-solution} -(autographr(create_star(50)) + ggtitle("Star graph")) -(autographr(create_star(50, directed = TRUE)) + ggtitle("Star out")) -(autographr(to_redirected(create_star(50, directed = TRUE))) + ggtitle("Star in")) -``` - -#### Trees - -Trees, or regular trees, are networks with branching nodes. -They can be directed or undirected, and tend to indicate strong hierarchy. -Again graph three networks: - -- one undirected with 2 branches per node -- a directed network with 2 branches per node -- the same as above, but graphed using the "tree" layout - -```{r tree, exercise = TRUE, purl = FALSE} - -``` - -```{r tree-solution} -# width argument specifies the breadth of the branches -(autographr(create_tree(50, width = 2)) + ggtitle("Tree graph")) -(autographr(create_tree(50, width = 2, directed = TRUE)) + ggtitle("Tree out")) -(autographr(create_tree(50, width = 2, directed = TRUE), "tree") + ggtitle("Tree layout")) -``` - -Try varying the `width` argument to see the result. - -#### Lattices - -Lattices reflect highly clustered networks -where there is a high likelihood that interaction partners also interact. -They are used to show how clustering facilitates or limits diffusion -or makes pockets of behaviour stable. - -```{r lat-qa, echo=FALSE, purl = FALSE} -question("Why are lattices considered highly clustered?", - answer("Because neighbours are likely also neighbours of each other", - message = learnr::random_praise(), - correct = TRUE), - answer("Because all nodes are directly connected to each other", - message = learnr::random_encouragement()), - answer("Because there is a single component", - message = learnr::random_encouragement()), - answer("Because there is a single community", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE) -``` - -Note that `create_lattice()` in `{manynet}` works a little differently -to how it works in `{igraph}`. -In `{igraph}` the number or vector passed to the function indicates -the length of each dimension. -So `c(50)` would be a one-dimensional lattice, -essentially a chain of 50 nodes connected to their neighbours. -`c(50,50)` would be a two-dimensional lattice, -of 50 nodes long and 50 nodes wide. -`c(50,50,50)` would be a three-dimensional lattice, -of 50 nodes long, 50 nodes wide, and 50 nodes deep, etc. - -_But_ this doesn't help us when we want to see what a lattice representation -with the same order (number of nodes) as a given network would be. -For example, perhaps we just want to know what a lattice with 50 nodes -would look like. -So `{manynet}` instead tries to find the most even or balanced -two-dimensional representation with a given number of nodes. - -Graph two lattices, one with 50 nodes, -and another with half the number of nodes. - -```{r lattices, exercise = TRUE, purl = FALSE} - -``` - -```{r lattices-solution} -(autographr(create_lattice(50)) + ggtitle("One-mode lattice graph")) -(autographr(create_lattice(50/2)) + ggtitle("Smaller lattice graph")) -``` - -#### Rings - -This creates a graph where each node has two separate neighbours -which creates a ring graph. -Graph three ring networks: - -- one with 50 nodes -- one with 50 nodes where they are connected to neighbours two steps away, - on a "circle" layout -- the same as above, but on a "stress" layout - -```{r rings, exercise = TRUE, purl = FALSE} - -``` - -```{r rings-solution} -(autographr(create_ring(50)) + ggtitle("Ring graph", subtitle = "Starring Naomi Watts")) -# width argument specifies the width of the ring -(autographr(create_ring(50, width = 2), "circle") + ggtitle("The Ring Two", subtitle = "No different?")) -(autographr(create_ring(50, width = 2), "stress") + ggtitle("The Ring Two v2.0")) -``` - -### Probabilistic graphs - -Next we are going to take a look at some probabilistic graphs. -These involve some random element, perhaps in addition to specific rules, -to stochastically 'generate' networks of certain types of topologies. -As such, we'll be using the `generate_*()` group of functions from `{manynet}`. - -#### Random graphs - -An Erdös-Renyi graph is simply a random graph. -You will need to specify the probability of a tie -in addition to the number of nodes. -An Erdos-Renyi graph on the vertex set $V$ is a random graph -which connects each pair of nodes ${i,j}$ with probability $p$, independent. -Note that for a “sparse” ER graphs, $p$ must decrease as $N$ goes up. -Generate three random networks of 50 nodes and a density of 0.08: - -```{r random, exercise = TRUE, purl = FALSE} - -``` - -```{r random-solution} -(autographr(generate_random(50, 0.08)) + ggtitle("Random 1 graph")) -(autographr(generate_random(50, 0.08)) + ggtitle("Random 2 graph")) -(autographr(generate_random(50, 0.08)) + ggtitle("Random 3 graph")) -``` - -Keep going if you like... it will be a little different every time. -Note that you can also pass the second argument an integer, -in which case the function will interpret that as the number of ties/edges rather than the probability that a tie is present. -Try generating a random graph with 200 edges/ties now: - -```{r randomno, exercise = TRUE, purl = FALSE} - -``` - -```{r randomno-solution} -(erdren4 <- autographr(generate_random(50, 200)) + ggtitle("Random 1 graph")) -``` - -#### Small-world graphs - -Remember the ring graph from above? -What if we rewire (change) some of the edges at a certain probability? -This is how small-world networks are generated. -Graph three small-world networks, all with 50 nodes and a rewiring probability of 0.025. - -```{r smallw, exercise = TRUE, purl = FALSE} - -``` - -```{r smallw-solution} -(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 1 graph")) -(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 2 graph")) -(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 3 graph")) -``` - -With on average 2.5 ties randomly rewired, does the structure look different? -This is a small-world network, where clustering/transitivity remains high -but path lengths are much lower than they would otherwise be. -Remember that in a small-world network, the shortest-path distance between nodes -increases sufficiently slowly as a function of the number of nodes in the network. -You can also call these networks a Watts–Strogatz toy network. -If you want to review this, go back to the reading by Watts (2004). - -There is also such a thing as a network's small-world coefficient. -See the help page for more details, -but with the default equation ('omega'), -the coefficient typically ranges between 0 and 1, -where 1 is as close to a small-world as possible. -Try it now on a small-world generated network, -but with a rewiring probability of 0.25: - -```{r smallwtest, exercise = TRUE, purl = FALSE} - -``` - -```{r smallwtest-solution} -network_smallworld(generate_smallworld(50, 0.25)) -``` - -#### Scale-free graphs - -There is another famous model in network science: the scale-free model. -Remember: -"In many real-world networks, the distribution of the number of network neighbours -the degree distribution is typically right-skewed with a "heavy tail". -A majority of the nodes have less-than-average degree and -a small fraction of hubs are many times better connected than average (2004, p. 250). - -The following generates a scale-free graph according to the Barabasi-Albert (BA) model -that rests upon the mechanism of preferential attachment. -More on this in the Watts paper (2005, p.51) and Merton (1968). -The BA model rests on two mechanisms: -population growth and preferential attachment. -Population growth: real networks grow in time as new members join the population. -Preferential/cumulative attachment means that newly arriving nodes will tend to -connect to already well-connected nodes rather than poorly connected ones. - -Generate and graph three scale-free networks, -with alpha parameters of 0.5, 1, and 1.5. - -```{r scalef, exercise = TRUE, purl = FALSE} - -``` - -```{r scalef-solution} -(autographr(generate_scalefree(50, 0.5)) + - ggtitle("Scalefree 1 graph", subtitle = "Power = .5")) -(autographr(generate_scalefree(50, 1)) + - ggtitle("Scalefree 2 graph", subtitle = "Power = 1")) -(autographr(generate_scalefree(50, 1.5)) + - ggtitle("Scalefree 3 graph", subtitle = "Power = 1.5")) -``` - -You can also test whether a network has a degree distribution that fits -the scale-free model. -When a Kolmogorov-Smirnov test p-value less than 0.05 is implied, -a message is given that you should reject the hypothesis -that a power law fits here. -With an alpha/power-law exponent between 2 and 3, -one generally cannot reject the hypothesis that the observed data -comes from a power-law distribution. - -```{r scaleftest, exercise = TRUE, purl = FALSE} - -``` - -```{r scaleftest-solution} -network_scalefree(generate_scalefree(50, 2)) -``` - -## Core-Periphery - -### Core-periphery graphs - -Lastly, we'll take a look at some core-periphery graphs. -The most common definition of a core-periphery network -is one in which the network can be partitioned into two groups -such that one group of nodes (the core) has -dense interactions among themselves, -moderately dense interactions with the second group, -and the second group (the periphery) has -sparse interactions among themselves. - -```{r corevcomm-qa, echo=FALSE, purl = FALSE} -question("Can a single network have both a community structure and a core-periphery structure?", - answer("No", message = learnr::random_encouragement()), - answer("Yes", - message = learnr::random_praise(), - correct = TRUE), - random_answer_order = TRUE, - allow_retry = TRUE) -``` - -We can visualise extreme versions of such a network -using the `create_core()` function. -Graph a core-periphery network of 50 nodes -(which, unless a core-periphery membership assignment is given, -will be split evenly between core and periphery partitions). - -```{r core, exercise=TRUE, purl = FALSE} - -``` - -```{r core-solution} -(autographr(create_core(50)) + ggtitle("Core")) -``` - -### Core-periphery assignment - -Let's consider identifying the core and peripheral nodes in a network. -Let's use the `ison_lawfirm` dataset from `{manynet}`. -This dataset involves relations between partners in a corporate law firm in New England. -First of all, graph the data and see whether you can guess which nodes -might be part of the core and which are part of the periphery. -Color the nodes by Gender, Office, Practice, and School. -Any you might think correlate with core status? - -```{r gnet, exercise=TRUE, purl = FALSE} - -``` - -```{r gnet-solution} -autographr(ison_lawfirm, node_color = "school") -``` - -Next, let's assign nodes to the core and periphery blocks -using the `node_core()` function from `{migraph}`. -It works pretty straightforwardly. -By default it runs down the rank order of nodes by their degree, -at each step working out whether including the next highest degree node -in the core will maximise the core-periphery structure of the network. - -```{r nodecore, exercise=TRUE, purl = FALSE} - -``` - -```{r nodecore-solution} -ison_lawfirm %>% - mutate(nc = node_core(ison_lawfirm)) %>% - autographr(node_color = "nc") -``` - -This graph suggests that there might even be two cores here, -one on the left and one on the right. - -But is it really all that much of a core-periphery structure? -We can establish how correlated our network is compared to -a core-periphery model of the same dimension using `network_core()`. - -```{r netcore, exercise=TRUE, purl = FALSE} - -``` - -```{r netcore-solution} -network_core(ison_lawfirm, node_core(ison_lawfirm)) -``` - -```{r corecorr-qa, echo=FALSE, purl = FALSE} -question("What can we say about this correlation.", - answer("It is a perfect positive relationship", - message = learnr::random_encouragement()), - answer("It is fairly strong", - message = learnr::random_encouragement()), - answer("It is negative", - message = learnr::random_encouragement()), - answer("There is absolutely no correlation", - message = learnr::random_encouragement()), - answer("None of the above", correct = TRUE, - message = learnr::random_praise()), - allow_retry = TRUE -) -``` - -Note that `node_core()` also includes a method that descends through -the rank order of nodes' eigenvector centralities instead of degree centralities. -Why might that not be such a good choice here? - -Now let's see whether our core-periphery membership vector correlates with any of the three -categorical attributes we looked at before. -Since we're doing this on categorical variables, we'll use the Chi-squared test in base R. -Take a look and see whether there is a statistically significant association -between gender and core (or periphery) status. - -```{r chisq, exercise=TRUE, purl=FALSE} - -``` - -```{r chisq-solution} -chisq.test(node_core(ison_lawfirm), node_attribute(ison_lawfirm, "Gender")) -``` - -```{r chisq-qa, echo=FALSE, purl = FALSE} -question("There a statistically significant association between the core assignment and...", - answer("gender.", - message = learnr::random_encouragement()), - answer("office.", - message = learnr::random_encouragement()), - answer("school.", - message = learnr::random_encouragement()), - answer("practice.", - message = learnr::random_encouragement()), - answer("none of the above variables.", correct = TRUE, - message = learnr::random_praise()), - allow_retry = TRUE -) -``` - -### Coreness values - -An alternative route is to identify 'core' nodes -depending on their _k_-coreness. -In `{migraph}`, we can return nodes _k_-coreness -with `node_coreness()` instead of -the `node_core()` used for core-periphery. - -```{r nodecoren, exercise=TRUE, purl = FALSE} - -``` - -```{r nodecoren-solution} -ison_lawfirm %>% - mutate(ncn = node_coreness(ison_lawfirm)) %>% - autographr(node_color = "ncn") + scale_colour_sdgs() -``` - -```{r dich-qa, echo=FALSE, purl = FALSE} -question("Which has more than two classes/groups.", - answer("node_coreness()", correct = TRUE, - message = learnr::random_praise()), - answer("node_core()", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -```{r ness-qa, echo=FALSE, purl = FALSE} -question("Select the correct definitions:", - answer("The k-core of a network is a maximal subgraph in which each vertex has at least degree k.", correct = TRUE, - message = learnr::random_praise()), - answer("The coreness of a node is k if it belongs to the k-core but not to the (k+1)-core.", correct = TRUE), - answer("The coreness of a node is equal to its degree.", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -## Network Resilience - -### How cohesive is the network? - -When investigating a network's resilience, -we might think of whether the network will remain connected despite some nodes or ties dropping out. -Let's explore how resilient a (core) network of adolescents (`ison_adolescents`) might be. -First, we might be interested in whether the network is connected at all. - -```{r connected, exercise=TRUE, purl = FALSE} - -``` - -```{r connected-solution} -network_connectedness(ison_adolescents) -``` - -This measure gets at the proportion of dyads that can reach each other in the network. -Another way to get at this would be to see how many components there are in the network. - -```{r connect-qa, echo=FALSE, purl=FALSE} -question("But counting the number of components instead of connectedness can overemphasise:", - answer("Isolates", correct = TRUE, - message = learnr::random_praise()), - answer("Small components", correct = TRUE, - message = learnr::random_encouragement()), - answer("Density", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -A dropped tie can have severe consequences to the topology of a network -if it is a bridge, say. -But a dropped node can be even more consequential, as it will take any ties it has with it. -Find out how many dropped nodes it would take to (further) fragment the network. - -```{r cohesion, exercise=TRUE, purl = FALSE} - -``` - -```{r cohesion-solution} -network_cohesion(ison_adolescents) -``` - -```{r cohesion-qa, echo=FALSE, purl = FALSE} -question("The result of this function represents...", - answer("the minimum number of nodes necessary to remove from the network to increase the number of components.", correct = TRUE, - message = learnr::random_praise()), - answer("the number of strong components in the network.", - message = learnr::random_encouragement()), - answer("the minimum number of ties necessary to remove from the network to increase the number of components.", - message = "This is actually the definition of `node_adhesion()`."), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -```{r res-qa, echo=FALSE, purl = FALSE} -question("The higher the minimum number of nodes to remove...", - answer("the more resilient is the network.", correct = TRUE, - message = learnr::random_praise()), - answer("the less resilient the network.", - message = learnr::random_encouragement()), - random_answer_order = TRUE, - allow_retry = TRUE -) -``` - -### Identifying cutpoints - -But which are these nodes? Is there more than one? -Nodes that endanger fragmentation of the network are called cutpoints. -Find and use a function to identify which, if any, of the nodes in the `ison_adolescents` -network are cutpoints. - -```{r idcuts, exercise = TRUE, purl=FALSE} - -``` - -```{r idcuts-solution} -node_is_cutpoint(ison_adolescents) -``` - -Ok, so this results in a vector identifying which nodes are cutpoints (TRUE) or not (FALSE). -Somewhat more useful though would be to highlight these nodes on the network. -Can you add a node attribute that highlights which nodes are cutpoints? - -```{r closerlook, exercise = TRUE, purl=FALSE} - -``` - -```{r closerlook-solution} -ison_adolescents |> mutate(cut = node_is_cutpoint(ison_adolescents)) |> - autographr(node_color = "cut") -``` - -### Identifying bridges - -Let's do something similar now, but with respect to ties rather than nodes. - -```{r tieside, exercise = TRUE, purl=FALSE} - -``` - -```{r tieside-solution} -network_adhesion(ison_adolescents) -ison_adolescents |> mutate_ties(cut = tie_is_bridge(ison_adolescents)) |> - autographr(edge_color = "cut") -``` - -We could also investigate the opposite of a bridge, -the degree to which ties are deeply embedded in triangles. -This is called (rather confusingly) tie cohesion. - -```{r tiecoh, exercise = TRUE, purl=FALSE} - -``` - -```{r tiecoh-solution} -ison_adolescents |> mutate_ties(coh = tie_cohesion(ison_adolescents)) |> - autographr(edge_color = "coh") -``` - -Where would you target your efforts if you wanted to fragment this network? diff --git a/inst/tutorials/tutorial6/topology.html b/inst/tutorials/tutorial6/topology.html deleted file mode 100644 index 25b922658..000000000 --- a/inst/tutorials/tutorial6/topology.html +++ /dev/null @@ -1,2466 +0,0 @@ - - - - - - - - - - - - - - - - - -Topology - - - - - - - - - - - - - - - - - - - - - -Skip to Tutorial Content - - - -
-
- -
- -

In this tutorial, we’ll explore:

-
    -
  • how to create or generate different network topologies
  • -
  • the core-periphery structure of a network
  • -
  • features of a network related to its resilience
  • -
-
-

Generate networks of different structures

-

This tutorial covers a range of different network topologies: trees, -lattices, random, small-world, scale-free, and core-periphery networks. -These ideal networks exaggerate centrality, cohesion, and randomness -features, and are thus great for theory-building and investigating the -relationship between rules and structure.

-

In this practical, we’re going to create/generate a number of -ideal-typical network topologies and plot them. We’ll first look at some -deterministic algorithms for creating networks of different -structures, and then look at how the introduction of some randomness can -generate a variety of network structures.

-
-

Deterministic graphs

-

To begin with, let’s create a few ‘empty’ and full/‘complete’ graphs. -You will want to use some of the create_*() group of -functions from {manynet}, because they create graphs -following some strict rule(s). The two functions you will want to use -here are create_empty() and create_filled(). -create_empty() creates an empty graph with the given number -of nodes, in this case 50 nodes. For create_filled() we’re -creating a full graph, where all of the nodes are connected to all of -the other nodes.

-

Let’s say that we want to explore networks of fifty nodes in this -script. Graph one empty and one complete network with 50 nodes each, -give them an informative title, and plot the graphs together. What would -a complete network with half the nodes look like? Add that too.

-
- -
-
-
(autographr(create_empty(50), "circle") + ggtitle("Empty graph"))
-(autographr(create_filled(50)) + ggtitle("Complete graph"))
-(autographr(create_filled(50/2)) + ggtitle("Complete graph (smaller)"))
-
-
-

Stars

-

In a star network, there is one node to which all other nodes are -connected. There is no transitivity. The maximum path length is two. And -centrality is maximised! This network maximises all centrality measures -as one node acts as the sole bridge connecting one part of the network -to the other.

-

Use the create_star() function to graph three star -networks:

-
    -
  • an undirected star network
  • -
  • a out-directed star network
  • -
  • and an in-directed star network
  • -
-
- -
-
-
(autographr(create_star(50)) + ggtitle("Star graph"))
-(autographr(create_star(50, directed = TRUE)) + ggtitle("Star out"))
-(autographr(to_redirected(create_star(50, directed = TRUE))) + ggtitle("Star in"))
-
-
-
-

Trees

-

Trees, or regular trees, are networks with branching nodes. They can -be directed or undirected, and tend to indicate strong hierarchy. Again -graph three networks:

-
    -
  • one undirected with 2 branches per node
  • -
  • a directed network with 2 branches per node
  • -
  • the same as above, but graphed using the “tree” layout
  • -
-
- -
-
-
# width argument specifies the breadth of the branches
-(autographr(create_tree(50, width = 2)) + ggtitle("Tree graph"))
-(autographr(create_tree(50, width = 2, directed = TRUE)) + ggtitle("Tree out"))
-(autographr(create_tree(50, width = 2, directed = TRUE), "tree") + ggtitle("Tree layout"))
-
-

Try varying the width argument to see the result.

-
-
-

Lattices

-

Lattices reflect highly clustered networks where there is a high -likelihood that interaction partners also interact. They are used to -show how clustering facilitates or limits diffusion or makes pockets of -behaviour stable.

-
-
-
-
-
- -
-
-

Note that create_lattice() in {manynet} -works a little differently to how it works in {igraph}. In -{igraph} the number or vector passed to the function -indicates the length of each dimension. So c(50) would be a -one-dimensional lattice, essentially a chain of 50 nodes connected to -their neighbours. c(50,50) would be a two-dimensional -lattice, of 50 nodes long and 50 nodes wide. c(50,50,50) -would be a three-dimensional lattice, of 50 nodes long, 50 nodes wide, -and 50 nodes deep, etc.

-

But this doesn’t help us when we want to see what a lattice -representation with the same order (number of nodes) as a given network -would be. For example, perhaps we just want to know what a lattice with -50 nodes would look like. So {manynet} instead tries to -find the most even or balanced two-dimensional representation with a -given number of nodes.

-

Graph two lattices, one with 50 nodes, and another with half the -number of nodes.

-
- -
-
-
(autographr(create_lattice(50)) + ggtitle("One-mode lattice graph"))
-(autographr(create_lattice(50/2)) + ggtitle("Smaller lattice graph"))
-
-
-
-

Rings

-

This creates a graph where each node has two separate neighbours -which creates a ring graph. Graph three ring networks:

-
    -
  • one with 50 nodes
  • -
  • one with 50 nodes where they are connected to neighbours two steps -away, on a “circle” layout
  • -
  • the same as above, but on a “stress” layout
  • -
-
- -
-
-
(autographr(create_ring(50)) + ggtitle("Ring graph", subtitle = "Starring Naomi Watts"))
-# width argument specifies the width of the ring
-(autographr(create_ring(50, width = 2), "circle") + ggtitle("The Ring Two", subtitle = "No different?"))
-(autographr(create_ring(50, width = 2), "stress") + ggtitle("The Ring Two v2.0"))
-
-
-
-
-

Probabilistic graphs

-

Next we are going to take a look at some probabilistic graphs. These -involve some random element, perhaps in addition to specific rules, to -stochastically ‘generate’ networks of certain types of topologies. As -such, we’ll be using the generate_*() group of functions -from {manynet}.

-
-

Random graphs

-

An Erdös-Renyi graph is simply a random graph. You will need to -specify the probability of a tie in addition to the number of nodes. An -Erdos-Renyi graph on the vertex set \(V\) is a random graph which connects each -pair of nodes \({i,j}\) with -probability \(p\), independent. Note -that for a “sparse” ER graphs, \(p\) -must decrease as \(N\) goes up. -Generate three random networks of 50 nodes and a density of 0.08:

-
- -
-
-
(autographr(generate_random(50, 0.08)) + ggtitle("Random 1 graph"))
-(autographr(generate_random(50, 0.08)) + ggtitle("Random 2 graph"))
-(autographr(generate_random(50, 0.08)) + ggtitle("Random 3 graph"))
-
-

Keep going if you like… it will be a little different every time. -Note that you can also pass the second argument an integer, in which -case the function will interpret that as the number of ties/edges rather -than the probability that a tie is present. Try generating a random -graph with 200 edges/ties now:

-
- -
-
-
(erdren4 <- autographr(generate_random(50, 200)) + ggtitle("Random 1 graph"))
-
-
-
-

Small-world graphs

-

Remember the ring graph from above? What if we rewire (change) some -of the edges at a certain probability? This is how small-world networks -are generated. Graph three small-world networks, all with 50 nodes and a -rewiring probability of 0.025.

-
- -
-
-
(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 1 graph"))
-(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 2 graph"))
-(autographr(generate_smallworld(50, 0.025)) + ggtitle("Smallworld 3 graph"))
-
-

With on average 2.5 ties randomly rewired, does the structure look -different? This is a small-world network, where clustering/transitivity -remains high but path lengths are much lower than they would otherwise -be. Remember that in a small-world network, the shortest-path distance -between nodes increases sufficiently slowly as a function of the number -of nodes in the network. You can also call these networks a -Watts–Strogatz toy network. If you want to review this, go back to the -reading by Watts (2004).

-

There is also such a thing as a network’s small-world coefficient. -See the help page for more details, but with the default equation -(‘omega’), the coefficient typically ranges between 0 and 1, where 1 is -as close to a small-world as possible. Try it now on a small-world -generated network, but with a rewiring probability of 0.25:

-
- -
-
-
network_smallworld(generate_smallworld(50, 0.25))
-
-
-
-

Scale-free graphs

-

There is another famous model in network science: the scale-free -model. Remember: “In many real-world networks, the distribution of the -number of network neighbours the degree distribution is typically -right-skewed with a”heavy tail”. A majority of the nodes have -less-than-average degree and a small fraction of hubs are many times -better connected than average (2004, p. 250).

-

The following generates a scale-free graph according to the -Barabasi-Albert (BA) model that rests upon the mechanism of preferential -attachment. More on this in the Watts paper (2005, p.51) and Merton -(1968). The BA model rests on two mechanisms: population growth and -preferential attachment. Population growth: real networks grow in time -as new members join the population. Preferential/cumulative attachment -means that newly arriving nodes will tend to connect to already -well-connected nodes rather than poorly connected ones.

-

Generate and graph three scale-free networks, with alpha parameters -of 0.5, 1, and 1.5.

-
- -
-
-
(autographr(generate_scalefree(50, 0.5)) +
-    ggtitle("Scalefree 1 graph", subtitle = "Power = .5"))
-(autographr(generate_scalefree(50, 1)) +
-    ggtitle("Scalefree 2 graph", subtitle = "Power = 1"))
-(autographr(generate_scalefree(50, 1.5)) +
-    ggtitle("Scalefree 3 graph", subtitle = "Power = 1.5"))
-
-

You can also test whether a network has a degree distribution that -fits the scale-free model. When a Kolmogorov-Smirnov test p-value less -than 0.05 is implied, a message is given that you should reject the -hypothesis that a power law fits here. With an alpha/power-law exponent -between 2 and 3, one generally cannot reject the hypothesis that the -observed data comes from a power-law distribution.

-
- -
-
-
network_scalefree(generate_scalefree(50, 2))
-
-
-
-
-
-

Core-Periphery

-
-

Core-periphery graphs

-

Lastly, we’ll take a look at some core-periphery graphs. The most -common definition of a core-periphery network is one in which the -network can be partitioned into two groups such that one group of nodes -(the core) has dense interactions among themselves, moderately dense -interactions with the second group, and the second group (the periphery) -has sparse interactions among themselves.

-
-
-
-
-
- -
-
-

We can visualise extreme versions of such a network using the -create_core() function. Graph a core-periphery network of -50 nodes (which, unless a core-periphery membership assignment is given, -will be split evenly between core and periphery partitions).

-
- -
-
-
(autographr(create_core(50)) + ggtitle("Core"))
-
-
-
-

Core-periphery assignment

-

Let’s consider identifying the core and peripheral nodes in a -network. Let’s use the ison_lawfirm dataset from -{manynet}. This dataset involves relations between partners -in a corporate law firm in New England. First of all, graph the data and -see whether you can guess which nodes might be part of the core and -which are part of the periphery. Color the nodes by Gender, Office, -Practice, and School. Any you might think correlate with core -status?

-
- -
-
-
autographr(ison_lawfirm, node_color = "School")
-
-

Next, let’s assign nodes to the core and periphery blocks using the -node_core() function from {migraph}. It works -pretty straightforwardly. By default it runs down the rank order of -nodes by their degree, at each step working out whether including the -next highest degree node in the core will maximise the core-periphery -structure of the network.

-
- -
-
-
ison_lawfirm %>% 
-  mutate(nc = node_core(ison_lawfirm)) %>% 
-  autographr(node_color = "nc")
-
-

This graph suggests that there might even be two cores here, one on -the left and one on the right.

-

But is it really all that much of a core-periphery structure? We can -establish how correlated our network is compared to a core-periphery -model of the same dimension using network_core().

-
- -
-
-
network_core(ison_lawfirm, node_core(ison_lawfirm))
-
-
-
-
-
-
- -
-
-

Note that node_core() also includes a method that -descends through the rank order of nodes’ eigenvector centralities -instead of degree centralities. Why might that not be such a good choice -here?

-

Now let’s see whether our core-periphery membership vector correlates -with any of the three categorical attributes we looked at before. Since -we’re doing this on categorical variables, we’ll use the Chi-squared -test in base R. Take a look and see whether there is a statistically -significant association between gender and core (or periphery) -status.

-
- -
-
-
chisq.test(node_core(ison_lawfirm), node_attribute(ison_lawfirm, "Gender"))
-
-
-
-
-
-
- -
-
-
-
-

Coreness values

-

An alternative route is to identify ‘core’ nodes depending on their -k-coreness. In {migraph}, we can return nodes -k-coreness with node_coreness() instead of the -node_core() used for core-periphery.

-
- -
-
-
ison_lawfirm %>% 
-  mutate(ncn = node_coreness(ison_lawfirm)) %>% 
-  autographr(node_color = "ncn")
-
-
-
-
-
-
- -
-
-
-
-
-
-
- -
-
-
-
-
-

Network Resilience

-
-

How cohesive is the network?

-

When investigating a network’s resilience, we might think of whether -the network will remain connected despite some nodes or ties dropping -out. Let’s explore how resilient a (core) network of adolescents -(ison_adolescents) might be. First, we might be interested -in whether the network is connected at all.

-
- -
-
-
network_connectedness(ison_adolescents)
-
-

This measure gets at the proportion of dyads that can reach each -other in the network. Another way to get at this would be to see how -many components there are in the network.

-
-
-
-
-
- -
-
-

A dropped tie can have severe consequences to the topology of a -network if it is a bridge, say. But a dropped node can be even more -consequential, as it will take any ties it has with it. Find out how -many dropped nodes it would take to (further) fragment the network.

-
- -
-
-
network_cohesion(ison_adolescents)
-
-
-
-
-
-
- -
-
-
-
-
-
-
- -
-
-
-
-

Identifying cutpoints

-

But which are these nodes? Is there more than one? Nodes that -endanger fragmentation of the network are called cutpoints. Find and use -a function to identify which, if any, of the nodes in the -ison_adolescents network are cutpoints.

-
- -
-
-
node_is_cutpoint(ison_adolescents)
-
-

Ok, so this results in a vector identifying which nodes are cutpoints -(TRUE) or not (FALSE). Somewhat more useful though would be to highlight -these nodes on the network. Can you add a node attribute that highlights -which nodes are cutpoints?

-
- -
-
-
ison_adolescents |> mutate(cut = node_is_cutpoint(ison_adolescents)) |> 
-  autographr(node_color = "cut")
-
-
-
-

Identifying bridges

-

Let’s do something similar now, but with respect to ties rather than -nodes.

-
- -
-
-
network_adhesion(ison_adolescents)
-ison_adolescents |> mutate_ties(cut = tie_is_bridge(ison_adolescents)) |> 
-  autographr(edge_color = "cut")
-
-

We could also investigate the opposite of a bridge, the degree to -which ties are deeply embedded in triangles. This is called (rather -confusingly) tie cohesion.

-
- -
-
-
ison_adolescents |> mutate_ties(coh = tie_cohesion(ison_adolescents)) |> 
-  autographr(edge_color = "coh")
-
-

Where would you target your efforts if you wanted to fragment this -network? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - -
-
- -
- -
-
-
-
- - -
-

Topology

-

by James Hollway, Andrea -Biswas-Tortajada

-
- - -
-
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/inst/tutorials/tutorial8/regression.Rmd b/inst/tutorials/tutorial8/diversity.Rmd similarity index 63% rename from inst/tutorials/tutorial8/regression.Rmd rename to inst/tutorials/tutorial8/diversity.Rmd index c92ecdc45..0e1d55156 100644 --- a/inst/tutorials/tutorial8/regression.Rmd +++ b/inst/tutorials/tutorial8/diversity.Rmd @@ -1,10 +1,14 @@ --- -title: "Regression" +title: "Diversity and Regression" author: "by James Hollway" output: learnr::tutorial: theme: journal runtime: shiny_prerendered +description: > + This tutorial aims to teach you how to measure and test network diversity, + moving from univariate to multivariate tests, including + network linear models (multiple regression quadratic assignment procedures). --- ```{r setup, include=FALSE} @@ -19,10 +23,9 @@ marvel_friends <- to_giant(marvel_friends) marvel_friends <- marvel_friends %>% to_subgraph(Appearances >= mean(Appearances)) ``` -## Setting up +## Initial visualisation For this session, we'll explore a couple of different datasets. - First, let's examine homogeneity/heterogeneity in the Marvel relationships dataset from `{manynet}`, `ison_marvel_relationships`. The dataset is quite complicated, @@ -74,10 +77,10 @@ marvel_friends <- marvel_friends %>% to_subgraph(Appearances >= mean(Appearances marvel_friends ``` -This gives us a dataset of nearly twenty characters and a little more than 100 edges. +This gives us an undirected network of nearly twenty characters and a little more than 100 edges. Recall that this data has several nodal attributes. -Explore a couple of these attributes, "Gender" and "PowerOrigin" visually -using `autographr()`. +Let's explore a couple of these attributes, "Gender" and "PowerOrigin", visually +using `graphr()`. ```{r plotfriends, exercise=TRUE, purl = FALSE} @@ -88,30 +91,78 @@ using `autographr()`. # you will need to use two different aesthetic dimensions to # represent them together. # Which will you present as shapes and which as colors? -autographr(____, - node_shape = ____, - node_color = ____) +graphr(____, + node_shape = ____, + node_color = ____) ``` ```{r plotfriends-solution} -autographr(marvel_friends, +graphr(marvel_friends, node_shape = "Gender", node_color = "PowerOrigin") ``` These variables seem to be distributed unevenly across the network. -There seems to be some homophily, or like choosing like, -operating here, but it is difficult to tell because there are -clearly more male than female superheros shown here, +There seems to be some homophily -- or like choosing like -- +operating here, but it is difficult to say conclusively because there are +clearly more male than female superheros, as well as clearly more superheros of mutant origin than others. -To get started then, we need to establish how diverse this network really is. +So what might seem like homophily could just be a result of there being +many more opportunities for ties between nodes of some categories. +We therefore need to establish how diverse this network really is. + +## Measuring richness + +We can begin by measuring the number of different categories there are. +Here we might assume that the more different categories there are, +the more diverse the network is. +The measure of 'richness' is inherited from the study of biodiversity, +and calculates the number of different categories are presented in a +dataset for a given variable. + +```{r rich, exercise=TRUE, purl = FALSE} + +``` + +```{r rich-hint, purl = FALSE} +net_richness(____, ____) +``` + +```{r rich-solution} +net_richness(marvel_friends, "Gender") +net_richness(marvel_friends, "PowerOrigin") +net_richness(marvel_friends, "Attractive") +net_richness(marvel_friends, "Rich") +net_richness(marvel_friends, "Intellect") +``` + +```{r richness-question, echo=FALSE, purl = FALSE} +question("Which variable is the most 'diverse' according to this richness measure?", + answer("Gender"), + answer("PowerOrigin", + correct = TRUE, + message = "There are four categories available in this data for power origin, while the other variables include only two categories each."), + answer("Attractive"), + answer("Rich"), + answer("Intellect"), + random_answer_order = TRUE, + allow_retry = TRUE +) +``` -## Calculating diversity +Note though that 'richness' as a network measure does not include +any sense of how distributed these categories are around the network. +There is a measure of nodal richness available, +which counts the number of different categories to which each node is tied, +but this does not offer a summary of how evenly distributed the appearance +of categories are. +For that we would need to move on to something like the Blau index. -### Calculating Blau index +## Measuring diversity -We can begin by calculating the diversity in the network for each attribute. -Recall that the Blau index for any given diversity variable is: +Another measure that reflects the diversity in the network for each attribute +is the Blau Index. +Recall that the Blau index for any given variable is: $$1 - \sum p_i^2$$ @@ -120,21 +171,22 @@ and $i$ indexes each of the given categories. A perfectly homogeneous group would receive a score of 0, while a perfectly heterogeneous group (with members spread evenly over the maximum categories) would receive a score of 1. +Obtain the network diversity scores for our five attributes. ```{r blau, exercise=TRUE, purl = FALSE} ``` ```{r blau-hint, purl = FALSE} -network_diversity(____, ____) +net_diversity(____, ____) ``` ```{r blau-solution} -network_diversity(marvel_friends, "Gender") -network_diversity(marvel_friends, "PowerOrigin") -network_diversity(marvel_friends, "Attractive") -network_diversity(marvel_friends, "Rich") -network_diversity(marvel_friends, "Intellect") +net_diversity(marvel_friends, "Gender") +net_diversity(marvel_friends, "PowerOrigin") +net_diversity(marvel_friends, "Attractive") +net_diversity(marvel_friends, "Rich") +net_diversity(marvel_friends, "Intellect") ``` Looks like there is more diversity in terms of where these characters got @@ -151,30 +203,32 @@ or equally intellectually diverse across gender.^[Note that this works for calcu ``` ```{r crossref-hint, purl = FALSE} -network_diversity(____, ____, ____) +net_diversity(____, ____, ____) ``` ```{r crossref-solution} -network_diversity(marvel_friends, "Gender", "PowerOrigin") +net_diversity(marvel_friends, "Gender", "PowerOrigin") as.factor(node_attribute(marvel_friends, "PowerOrigin")) # view categories in PowerOrigin -network_diversity(marvel_friends, "Intellect", "Gender") +net_diversity(marvel_friends, "Intellect", "Gender") ``` Note that the length of the vector returned as a result is the number of categories in the second category listed. -It looks like some origin stories are much more diverse than others. +It looks like some origin stories are more gender diverse than others. Gods (just Thor here) and humans are all men, whereas those with mutant or radiation origin stories are more gender diverse. -There doesn't seem to be any significant difference in intellect +There doesn't appear to be much difference in intellect across gender categories however. Ok, this tells us about how (un)even the distribution of these variables is in this network, -but it doesn't necessarily tell us whether within this network there is homophily/heterophily. +but it doesn't necessarily tell us whether ties are appearing more frequently +between nodes of similar (or different) categories. +For that we need to look at homophily/heterophily. -### Calculating EI index +## Measuring heterophily -A step in this direction is to calculate the EI (or E-I) index. -Calculating the EI index follows the same syntax. +The EI (or E-I) index offers a way to measure the degree to which ties +appear between rather than within groups of nodes of the same category. Recall that the EI index is calculated as: $$\frac{E-I}{E+I}$$ @@ -182,7 +236,7 @@ $$\frac{E-I}{E+I}$$ where $E$ is the number of ties present between a variable's categories (i.e. external), and $I$ is the number of ties present within a variable's categories (i.e. internal). As such, an EI index of -1 suggests perfect homophily, whereas an EI index of +1 suggests perfect heterophily. -(This is why the function is called `network_heterophily()`). +(This is why the function is called `net_heterophily()`). Check how homophilic three variables in the network are, "Gender", "PowerOrigin", and "Attractive". @@ -192,13 +246,13 @@ Check how homophilic three variables in the network are, ``` ```{r ei-hint, purl = FALSE} -network_heterophily(____, ____) +net_heterophily(____, ____) ``` ```{r ei-solution} -(obs.gender <- network_heterophily(marvel_friends, "Gender")) -(obs.powers <- network_heterophily(marvel_friends, "PowerOrigin")) -(obs.attract <- network_heterophily(marvel_friends, "Attractive")) +(obs.gender <- net_heterophily(marvel_friends, "Gender")) +(obs.powers <- net_heterophily(marvel_friends, "PowerOrigin")) +(obs.attract <- net_heterophily(marvel_friends, "Attractive")) ``` ```{r homophily-present, echo=FALSE, purl = FALSE} @@ -220,21 +274,21 @@ and doesn't tell us whether this is any more or less than what we might expect the score to be by chance for a network of this size and density and distribution of that attribute. -## Testing scores +## CUG tests -### Conditional uniform graph tests - -To see whether we should be surprised by a score this high/low, -we will simulate a series of random graphs +To see whether we should be surprised by scores this high/low, +we compare these scores with those from a series of random graphs (Erdös-Renyi/Bernoulli) of the same dimensions and -distribution of the attribute to find out whether there is +distribution of the attribute. +This can help us establish whether there is more homophily or heterophily than expected by chance. -This is called a conditional uniform graph test, -but `{migraph}` chooses more descriptive function names, +This is often called a conditional uniform graph or CUG test, +but `{migraph}` uses more descriptive function names, such as `test_random()`. Plot the results of running this function with respect to the EI index -on each of the three variables used above one thousand times. +on each of the three variables. +You can specify that one thousand simulations should be used using `times = 1000`. ```{r rando, exercise=TRUE, purl = FALSE} @@ -250,14 +304,14 @@ plot(rand.____) ```{r rando-hint-3, purl = FALSE} rand.gender <- test_random(marvel_friends, - network_heterophily, attribute = "Gender", + net_heterophily, attribute = "Gender", times = 1000) rand.power <- test_random(marvel_friends, - network_heterophily, attribute = "PowerOrigin", - times = 1000) + net_heterophily, attribute = "PowerOrigin", + times = 1000) rand.attract <- test_random(marvel_friends, - network_heterophily, attribute = "Attractive", - times = 1000) + net_heterophily, attribute = "Attractive", + times = 1000) plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") plot(rand.attract) + ggtitle("CUG test results for 'Attractive' attribute") @@ -265,14 +319,14 @@ plot(rand.attract) + ggtitle("CUG test results for 'Attractive' attribute") ```{r rando-solution} rand.gender <- test_random(marvel_friends, - network_heterophily, attribute = "Gender", + net_heterophily, attribute = "Gender", times = 1000) rand.power <- test_random(marvel_friends, - network_heterophily, attribute = "PowerOrigin", - times = 1000) + net_heterophily, attribute = "PowerOrigin", + times = 1000) rand.attract <- test_random(marvel_friends, - network_heterophily, attribute = "Attractive", - times = 1000) + net_heterophily, attribute = "Attractive", + times = 1000) plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") plot(rand.attract) + ggtitle("CUG test results for 'Attractive' attribute") @@ -296,15 +350,19 @@ While the coefficient itself is close to 0 (neither strong homophily nor heterop all the random networks generated returned larger EI scores, between .1 and .4. That is, there is significantly less heterophily here than expected. -### Quadratic assignment procedure tests +## QAP tests -Ah, but perhaps the random graph is not the best reference group +Ah, but perhaps random graphs are not the best reference group for establishing whether there is a significant homophily effect here. -After all, social networks are not completely random; they are structured. - -Another approach is to use permutations of the network. -Permuting the network retains the structure of the network, -but reassigns any labels (variables) randomly. +After all, social networks are not completely random; +they are _structured_ in particular ways, such as some nodes having higher degrees +than others, or there being a core and periphery or community topology. + +Another approach to establishing a baseline for whether we should be surprised +by a given score or not is to use permutations of the underlying network +instead of random graphs. +Permuting the network retains the structure of the network +because the ties are kept and only the labels (variables) are reassigned randomly. Let's first plot the observed data and some permuted data next to each other. ```{r perm, exercise=TRUE, purl = FALSE} @@ -312,18 +370,18 @@ Let's first plot the observed data and some permuted data next to each other. ``` ```{r perm-hint, purl = FALSE} -autographr(generate_permutation(____, with_attr = TRUE), ____) +graphr(generate_permutation(____, with_attr = TRUE), ____) ``` ```{r perm-solution} -old <- autographr(marvel_friends, - labels = FALSE, node_size = 6, - node_color = "PowerOrigin", - node_shape = "Gender") + ggtitle("Original network") -new <- autographr(generate_permutation(marvel_friends, with_attr = TRUE), - labels = FALSE, node_size = 6, - node_color = "PowerOrigin", - node_shape = "Gender") + ggtitle("Permuted network") +old <- graphr(marvel_friends, + labels = FALSE, node_size = 6, + node_color = "PowerOrigin", + node_shape = "Gender") + ggtitle("Original network") +new <- graphr(generate_permutation(marvel_friends, with_attr = TRUE), + labels = FALSE, node_size = 6, + node_color = "PowerOrigin", + node_shape = "Gender") + ggtitle("Permuted network") old + new ``` @@ -334,7 +392,7 @@ question("Which of the following is true?", answer("Permuted networks retain the structure of the original network.", correct = TRUE, message = learnr::random_praise()), - answer("Both random and permuted networks retain the distribution of attributes.", + answer("Both random and permuted networks retain the proportion of attributes from the original network.", correct = TRUE, message = learnr::random_praise()), answer("Permuted networks retain the ties among the same nodes from the original network.", @@ -344,8 +402,8 @@ question("Which of the following is true?", ) ``` -This single permutation suggests there might otherwise be some more even mixing of these -attributes, but it is just a single permutation. +This single permutation suggests a more even mixing of these +attributes is possible, but it is just a single permutation. Let's try a test that runs this over a succession of permutations, just as we did with random graphs. Plot the results for gender and power according to the random and permutation baselines. @@ -361,14 +419,20 @@ test_permutation(____, FUN = ____, attribute = ____, ```{r testperm-solution, purl = FALSE} (perm.gender <- test_permutation(marvel_friends, - network_heterophily, attribute = "Gender", + net_heterophily, attribute = "Gender", times = 1000)) (perm.power <- test_permutation(marvel_friends, - network_heterophily, attribute = "PowerOrigin", + net_heterophily, attribute = "PowerOrigin", times = 1000)) -(plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") + theme(plot.title = element_text(size=8)) | plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") + theme(plot.title = element_text(size=8))) / -(plot(perm.gender) + ggtitle("QAP test results for 'Gender' attribute") + theme(plot.title = element_text(size=8)) | plot(perm.power) + ggtitle("QAP test results for 'PowerOrigin' attribute") + theme(plot.title = element_text(size=8))) +(plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") + + theme(plot.title = element_text(size=8)) | + plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") + + theme(plot.title = element_text(size=8))) / +(plot(perm.gender) + ggtitle("QAP test results for 'Gender' attribute") + + theme(plot.title = element_text(size=8)) | + plot(perm.power) + ggtitle("QAP test results for 'PowerOrigin' attribute") + + theme(plot.title = element_text(size=8))) ``` Again, we see that there is perhaps nothing so surprising that we got the homophily score @@ -376,7 +440,7 @@ for gender that we did, but the lack of power origin heterophily is surprising. Note how the distributions are generally wider when permuting the observed network than creating a random distribution (be mindful of the scale of the _x_-axis). That is, taking into account the structure of the network leads us to -expect a larger spread in the EI index than when the variable is distributed around a random network. +expect a larger spread in the EI index than when the variable is distributed across a random network. ```{r cupqap-results, echo=FALSE, purl = FALSE} question("What can we say from these results?", @@ -394,7 +458,7 @@ question("What can we say from these results?", ) ``` -## Network linear models +## Network regression Next let us examine homophily in another network. The data were collected as part of an early experiment on communication between social network researchers who were using an Electronic Information Exchange System (EIES). @@ -404,18 +468,20 @@ Nodal attributes collected include the primary discipline and number of citation ```{r introeies, exercise=TRUE, purl = FALSE} ison_networkers -autographr(ison_networkers, - node_color = "Discipline") +graphr(ison_networkers, node_color = "Discipline") ``` Let's use both the continuous `Citations` and the categorical `Discipline` variables and come up with a couple of key hypotheses: -- $H_1$: whether these researchers send more emails to those who are cited more -- $H_2$: whether there is disciplinary homophily +- $H_1$: researchers send more emails to those who are cited more (popularity hypothesis) +- $H_2$: researchers send more emails to those of the same discipline (disciplinary homophily hypothesis) Let's start with a pretty maximally specified model -(note that it doesn't make sense to include both ego and alter effects because these are undirected). +(note that it doesn't make sense to include both ego and alter effects because these networks are undirected). +We are using `times = 200` here because of time-out limitations in the `learnr` tutorial system, +but for publication quality results you would want to base your conclusions on 2000 +simulations or more. ```{r qapmax, exercise=TRUE, exercise.timelimit = 3600, purl = FALSE} @@ -423,6 +489,7 @@ Let's start with a pretty maximally specified model ```{r qapmax-hint-1, purl = FALSE} network_reg(____, ison_networkers, times = 200) +# If the model runs into a timeout error, please reduce the number of 'times' in the function above. ``` ```{r qapmax-hint-2, purl = FALSE} @@ -430,20 +497,13 @@ weight ~ alter(Citations) + sim(Citations) + alter(Discipline) + same(Discipline) ``` -```{r qapmax-hint-3, purl = FALSE} -model1 <- network_reg(weight ~ alter(Citations) + sim(Citations) + - alter(Discipline) + same(Discipline), - ison_networkers, times = 200) -# If the model runs into a timeout error, please reduce the number of 'times' in the function above. -``` - ```{r qapmax-solution} -model1 <- network_reg(weight ~ alter(Citations) + sim(Citations) + - alter(Discipline) + same(Discipline), +model1 <- network_reg(weight ~ ego(Citations) + alter(Citations) + sim(Citations) + + ego(Discipline) + alter(Discipline) + same(Discipline), ison_networkers, times = 200) ``` -We can use tidy methods to get the salient information from this model, +We can use tidy methods (e.g. `tidy()`, `glance()`) to get the salient information from this model, and `{migraph}` includes also a plot method for these results to facilitate the quick interpretation of these results. @@ -458,12 +518,12 @@ plot(model1) ``` This violin plot presents the distribution of coefficients from permutations of the network, -with the fitted coefficient from the data as a red dot. -Subtle lines are used to indicate 95%, -but here the distributions are rendered so wide that they are often not seen. +with the coefficient fitted from the data as a red dot. +Lines are used to indicate 95% thresholds, +but here the distributions are rendered so wide that they are often not visible. ```{r qap-interp, echo=FALSE, purl = FALSE} -question("What can we say from these results?", +question("What can we say from the results from model 1?", answer("Researchers send more messages to those who are cited more", message = "Looks like alter Citations is not significant."), answer("Researchers send more messages to those who similarly cited", @@ -478,3 +538,9 @@ question("What can we say from these results?", allow_retry = TRUE ) ``` + +While these are the conclusions from this 'play' data, +you may have more and more interesting data at hand. +How would you go about specifying such a model? +Why is such an approach more appropriate for network data than linear +or logistic regression? diff --git a/inst/tutorials/tutorial8/regression.html b/inst/tutorials/tutorial8/diversity.html similarity index 80% rename from inst/tutorials/tutorial8/regression.html rename to inst/tutorials/tutorial8/diversity.html index 7373dc9b4..770a6bb5a 100644 --- a/inst/tutorials/tutorial8/regression.html +++ b/inst/tutorials/tutorial8/diversity.html @@ -15,7 +15,7 @@ -Regression +Diversity and Regression @@ -110,10 +110,10 @@
-
-

Setting up

-

For this session, we’ll explore a couple of different datasets.

-

First, let’s examine homogeneity/heterogeneity in the Marvel +

+

Initial visualisation

+

For this session, we’ll explore a couple of different datasets. +First, let’s examine homogeneity/heterogeneity in the Marvel relationships dataset from {manynet}, ison_marvel_relationships. The dataset is quite complicated, so to make this simpler, let’s concentrate on:

@@ -173,10 +173,10 @@

Setting up

marvel_friends <- marvel_friends %>% to_subgraph(Appearances >= mean(Appearances)) marvel_friends
-

This gives us a dataset of nearly twenty characters and a little more -than 100 edges. Recall that this data has several nodal attributes. -Explore a couple of these attributes, “Gender” and “PowerOrigin” -visually using autographr().

+

This gives us an undirected network of nearly twenty characters and a +little more than 100 edges. Recall that this data has several nodal +attributes. Let’s explore a couple of these attributes, “Gender” and +“PowerOrigin”, visually using graphr().

@@ -189,38 +189,80 @@

Setting up

# you will need to use two different aesthetic dimensions to # represent them together. # Which will you present as shapes and which as colors? -autographr(____, - node_shape = ____, - node_color = ____) +graphr(____, + node_shape = ____, + node_color = ____)
-
autographr(marvel_friends, 
+
graphr(marvel_friends, 
            node_shape = "Gender",
            node_color = "PowerOrigin")

These variables seem to be distributed unevenly across the network. -There seems to be some homophily, or like choosing like, operating here, -but it is difficult to tell because there are clearly more male than -female superheros shown here, as well as clearly more superheros of -mutant origin than others. To get started then, we need to establish how -diverse this network really is.

+There seems to be some homophily – or like choosing like – operating +here, but it is difficult to say conclusively because there are clearly +more male than female superheros, as well as clearly more superheros of +mutant origin than others. So what might seem like homophily could just +be a result of there being many more opportunities for ties between +nodes of some categories. We therefore need to establish how diverse +this network really is.

-
-

Calculating diversity

-
-

Calculating Blau index

-

We can begin by calculating the diversity in the network for each -attribute. Recall that the Blau index for any given diversity variable -is:

+
+

Measuring richness

+

We can begin by measuring the number of different categories there +are. Here we might assume that the more different categories there are, +the more diverse the network is. The measure of ‘richness’ is inherited +from the study of biodiversity, and calculates the number of different +categories are presented in a dataset for a given variable.

+
+ +
+
+
net_richness(____, ____)
+
+
+
net_richness(marvel_friends, "Gender")
+net_richness(marvel_friends, "PowerOrigin")
+net_richness(marvel_friends, "Attractive")
+net_richness(marvel_friends, "Rich")
+net_richness(marvel_friends, "Intellect")
+
+
+
+
+
+
+ +
+
+

Note though that ‘richness’ as a network measure does not include any +sense of how distributed these categories are around the network. There +is a measure of nodal richness available, which counts the number of +different categories to which each node is tied, but this does not offer +a summary of how evenly distributed the appearance of categories are. +For that we would need to move on to something like the Blau index.

+
+
+

Measuring diversity

+

Another measure that reflects the diversity in the network for each +attribute is the Blau Index. Recall that the Blau index for any given +variable is:

\[1 - \sum p_i^2\]

where \(p\) represents the proportion belonging to any given category, and \(i\) indexes each of the given categories. A perfectly homogeneous group would receive a score of 0, while a perfectly heterogeneous group (with members spread evenly over the -maximum categories) would receive a score of 1.

+maximum categories) would receive a score of 1. Obtain the network +diversity scores for our five attributes.

@@ -229,16 +271,16 @@

Calculating Blau index

-
network_diversity(____, ____)
+
net_diversity(____, ____)
-
network_diversity(marvel_friends, "Gender")
-network_diversity(marvel_friends, "PowerOrigin")
-network_diversity(marvel_friends, "Attractive")
-network_diversity(marvel_friends, "Rich")
-network_diversity(marvel_friends, "Intellect")
+
net_diversity(marvel_friends, "Gender")
+net_diversity(marvel_friends, "PowerOrigin")
+net_diversity(marvel_friends, "Attractive")
+net_diversity(marvel_friends, "Rich")
+net_diversity(marvel_friends, "Intellect")

Looks like there is more diversity in terms of where these characters got their powers, whether they have significant intellectual powers, and @@ -256,26 +298,26 @@

Calculating Blau index

-
network_diversity(____, ____, ____)
+
net_diversity(____, ____, ____)
-
network_diversity(marvel_friends, "Gender", "PowerOrigin")
+
net_diversity(marvel_friends, "Gender", "PowerOrigin")
 as.factor(node_attribute(marvel_friends, "PowerOrigin")) # view categories in PowerOrigin
-network_diversity(marvel_friends, "Intellect", "Gender")
+net_diversity(marvel_friends, "Intellect", "Gender")

Note that the length of the vector returned as a result is the number of categories in the second category listed. It looks like some origin -stories are much more diverse than others. Gods (just Thor here) and +stories are more gender diverse than others. Gods (just Thor here) and humans are all men, whereas those with mutant or radiation origin -stories are more gender diverse. There doesn’t seem to be any -significant difference in intellect across gender categories -however.

+stories are more gender diverse. There doesn’t appear to be much +difference in intellect across gender categories however.

Ok, this tells us about how (un)even the distribution of these variables is in this network, but it doesn’t necessarily tell us whether -within this network there is homophily/heterophily.

-
+ties are appearing more frequently between nodes of similar (or +different) categories. For that we need to look at +homophily/heterophily.


    @@ -285,11 +327,12 @@

    Calculating Blau index

    class="footnote-back">↩︎

-
-

Calculating EI index

-

A step in this direction is to calculate the EI (or E-I) index. -Calculating the EI index follows the same syntax. Recall that the EI -index is calculated as:

+
+
+

Measuring heterophily

+

The EI (or E-I) index offers a way to measure the degree to which +ties appear between rather than within groups of nodes of the same +category. Recall that the EI index is calculated as:

\[\frac{E-I}{E+I}\]

where \(E\) is the number of ties present between a variable’s categories (i.e. external), and Calculating EI index variable’s categories (i.e. internal). As such, an EI index of -1 suggests perfect homophily, whereas an EI index of +1 suggests perfect heterophily. (This is why the function is called -network_heterophily()).

+net_heterophily()).

Check how homophilic three variables in the network are, “Gender”, “PowerOrigin”, and “Attractive”.

Calculating EI index
-
network_heterophily(____, ____)
+
net_heterophily(____, ____)
-
(obs.gender <- network_heterophily(marvel_friends, "Gender"))
-(obs.powers <- network_heterophily(marvel_friends, "PowerOrigin")) 
-(obs.attract <- network_heterophily(marvel_friends, "Attractive")) 
+
(obs.gender <- net_heterophily(marvel_friends, "Gender"))
+(obs.powers <- net_heterophily(marvel_friends, "PowerOrigin")) 
+(obs.attract <- net_heterophily(marvel_friends, "Attractive")) 
@@ -330,21 +373,19 @@

Calculating EI index

chance for a network of this size and density and distribution of that attribute.

-
-
-

Testing scores

-
-

Conditional uniform graph tests

-

To see whether we should be surprised by a score this high/low, we -will simulate a series of random graphs (Erdös-Renyi/Bernoulli) of the -same dimensions and distribution of the attribute to find out whether -there is more homophily or heterophily than expected by chance.

-

This is called a conditional uniform graph test, but -{migraph} chooses more descriptive function names, such as +

+

CUG tests

+

To see whether we should be surprised by scores this high/low, we +compare these scores with those from a series of random graphs +(Erdös-Renyi/Bernoulli) of the same dimensions and distribution of the +attribute. This can help us establish whether there is more homophily or +heterophily than expected by chance.

+

This is often called a conditional uniform graph or CUG test, but +{migraph} uses more descriptive function names, such as test_random(). Plot the results of running this function -with respect to the EI index on each of the three variables used above -one thousand times.

+with respect to the EI index on each of the three variables. You can +specify that one thousand simulations should be used using +times = 1000.

@@ -364,14 +405,14 @@

Conditional uniform graph tests

data-completion="1" data-diagnostics="1" data-startover="1" data-lines="0" data-pipe="|>">
rand.gender <- test_random(marvel_friends, 
-                            network_heterophily, attribute = "Gender", 
+                           net_heterophily, attribute = "Gender", 
                            times = 1000)
 rand.power <- test_random(marvel_friends, 
-                           network_heterophily, attribute = "PowerOrigin", 
-                           times = 1000)
+                          net_heterophily, attribute = "PowerOrigin", 
+                          times = 1000)
 rand.attract <- test_random(marvel_friends, 
-                             network_heterophily, attribute = "Attractive", 
-                           times = 1000)
+                            net_heterophily, attribute = "Attractive", 
+                            times = 1000)
 plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute")
 plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute")
 plot(rand.attract) + ggtitle("CUG test results for 'Attractive' attribute")
@@ -398,16 +439,20 @@

Conditional uniform graph tests

returned larger EI scores, between .1 and .4. That is, there is significantly less heterophily here than expected.

-
-

Quadratic assignment procedure tests

-

Ah, but perhaps the random graph is not the best reference group for +

+

QAP tests

+

Ah, but perhaps random graphs are not the best reference group for establishing whether there is a significant homophily effect here. After -all, social networks are not completely random; they are structured.

-

Another approach is to use permutations of the network. Permuting the -network retains the structure of the network, but reassigns any labels -(variables) randomly. Let’s first plot the observed data and some -permuted data next to each other.

+all, social networks are not completely random; they are +structured in particular ways, such as some nodes having higher +degrees than others, or there being a core and periphery or community +topology.

+

Another approach to establishing a baseline for whether we should be +surprised by a given score or not is to use permutations of the +underlying network instead of random graphs. Permuting the network +retains the structure of the network because the ties are kept and only +the labels (variables) are reassigned randomly. Let’s first plot the +observed data and some permuted data next to each other.

@@ -416,19 +461,19 @@

Quadratic assignment procedure tests

-
autographr(generate_permutation(____, with_attr = TRUE), ____)
+
graphr(generate_permutation(____, with_attr = TRUE), ____)
-
old <- autographr(marvel_friends, 
-                  labels = FALSE, node_size = 6, 
-                  node_color = "PowerOrigin", 
-                  node_shape = "Gender") + ggtitle("Original network")
-new <- autographr(generate_permutation(marvel_friends, with_attr = TRUE),
-                   labels = FALSE, node_size = 6,
-                  node_color = "PowerOrigin",
-                  node_shape = "Gender") + ggtitle("Permuted network")
+
old <- graphr(marvel_friends,
+              labels = FALSE, node_size = 6,
+              node_color = "PowerOrigin",
+              node_shape = "Gender") + ggtitle("Original network")
+new <- graphr(generate_permutation(marvel_friends, with_attr = TRUE),
+              labels = FALSE, node_size = 6,
+              node_color = "PowerOrigin",
+              node_shape = "Gender") + ggtitle("Permuted network")
 old + new
@@ -439,11 +484,11 @@

Quadratic assignment procedure tests

-

This single permutation suggests there might otherwise be some more -even mixing of these attributes, but it is just a single permutation. -Let’s try a test that runs this over a succession of permutations, just -as we did with random graphs. Plot the results for gender and power -according to the random and permutation baselines.

+

This single permutation suggests a more even mixing of these +attributes is possible, but it is just a single permutation. Let’s try a +test that runs this over a succession of permutations, just as we did +with random graphs. Plot the results for gender and power according to +the random and permutation baselines.

@@ -459,14 +504,20 @@

Quadratic assignment procedure tests

data-completion="1" data-diagnostics="1" data-startover="1" data-lines="0" data-pipe="|>">
(perm.gender <- test_permutation(marvel_friends, 
-                                network_heterophily, attribute = "Gender",
+                                net_heterophily, attribute = "Gender",
                                 times = 1000))
 (perm.power <- test_permutation(marvel_friends, 
-                               network_heterophily, attribute = "PowerOrigin",
+                                net_heterophily, attribute = "PowerOrigin",
                                 times = 1000))
 
-(plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") + theme(plot.title = element_text(size=8)) | plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") + theme(plot.title = element_text(size=8))) /
-(plot(perm.gender) + ggtitle("QAP test results for 'Gender' attribute") + theme(plot.title = element_text(size=8)) | plot(perm.power) + ggtitle("QAP test results for 'PowerOrigin' attribute") + theme(plot.title = element_text(size=8)))
+(plot(rand.gender) + ggtitle("CUG test results for 'Gender' attribute") + + theme(plot.title = element_text(size=8)) | + plot(rand.power) + ggtitle("CUG test results for 'PowerOrigin' attribute") + + theme(plot.title = element_text(size=8))) / +(plot(perm.gender) + ggtitle("QAP test results for 'Gender' attribute") + + theme(plot.title = element_text(size=8)) | + plot(perm.power) + ggtitle("QAP test results for 'PowerOrigin' attribute") + + theme(plot.title = element_text(size=8)))

Again, we see that there is perhaps nothing so surprising that we got the homophily score for gender that we did, but the lack of power origin @@ -475,7 +526,7 @@

Quadratic assignment procedure tests

distribution (be mindful of the scale of the x-axis). That is, taking into account the structure of the network leads us to expect a larger spread in the EI index than when the variable is distributed -around a random network.

+across a random network.

@@ -485,9 +536,8 @@

Quadratic assignment procedure tests

-
-
-

Network linear models

+
+

Network regression

Next let us examine homophily in another network. The data were collected as part of an early experiment on communication between social network researchers who were using an Electronic Information Exchange @@ -502,22 +552,25 @@

Network linear models

data-completion="1" data-diagnostics="1" data-startover="1" data-lines="0" data-pipe="|>">
ison_networkers
-autographr(ison_networkers,
-           node_color = "Discipline")
+graphr(ison_networkers, node_color = "Discipline")

Let’s use both the continuous Citations and the categorical Discipline variables and come up with a couple of key hypotheses:

    -
  • \(H_1\): whether these researchers -send more emails to those who are cited more
  • -
  • \(H_2\): whether there is -disciplinary homophily
  • +
  • \(H_1\): researchers send more +emails to those who are cited more (popularity hypothesis)
  • +
  • \(H_2\): researchers send more +emails to those of the same discipline (disciplinary homophily +hypothesis)

Let’s start with a pretty maximally specified model (note that it doesn’t make sense to include both ego and alter effects because these -are undirected).

+networks are undirected). We are using times = 200 here +because of time-out limitations in the learnr tutorial +system, but for publication quality results you would want to base your +conclusions on 2000 simulations or more.

@@ -526,7 +579,8 @@

Network linear models

-
network_reg(____, ison_networkers, times = 200)
+
network_reg(____, ison_networkers, times = 200)
+# If the model runs into a timeout error, please reduce the number of 'times' in the function above.
Network linear models
weight ~ alter(Citations) + sim(Citations) + 
                       alter(Discipline) + same(Discipline)
-
-
model1 <- network_reg(weight ~ alter(Citations) + sim(Citations) + 
-                      alter(Discipline) + same(Discipline), 
-                      ison_networkers, times = 200)
-# If the model runs into a timeout error, please reduce the number of 'times' in the function above.
-
-

We can use tidy methods to get the salient information from this -model, and {migraph} includes also a plot method for these -results to facilitate the quick interpretation of these results.

+

We can use tidy methods (e.g. tidy(), +glance()) to get the salient information from this model, +and {migraph} includes also a plot method for these results +to facilitate the quick interpretation of these results.

@@ -563,9 +610,9 @@

Network linear models

plot(model1)

This violin plot presents the distribution of coefficients from -permutations of the network, with the fitted coefficient from the data -as a red dot. Subtle lines are used to indicate 95%, but here the -distributions are rendered so wide that they are often not seen.

+permutations of the network, with the coefficient fitted from the data +as a red dot. Lines are used to indicate 95% thresholds, but here the +distributions are rendered so wide that they are often not visible.

@@ -574,7 +621,11 @@

Network linear models

-

+

While these are the conclusions from this ‘play’ data, you may have +more and more interesting data at hand. How would you go about +specifying such a model? Why is such an approach more appropriate for +network data than linear or logistic regression? + + + + + + + + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - + - + + - - +

@@ -1273,7 +1401,8 @@

Network linear models

-

Regression

+

Diversity and +Regression

by James Hollway

diff --git a/man/between_centrality.Rd b/man/between_centrality.Rd deleted file mode 100644 index 660b189dd..000000000 --- a/man/between_centrality.Rd +++ /dev/null @@ -1,111 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_centrality.R -\name{between_centrality} -\alias{between_centrality} -\alias{node_betweenness} -\alias{node_induced} -\alias{node_flow} -\alias{tie_betweenness} -\alias{network_betweenness} -\title{Measures of betweenness-like centrality and centralisation} -\usage{ -node_betweenness(.data, normalized = TRUE, cutoff = NULL) - -node_induced(.data, normalized = TRUE, cutoff = NULL) - -node_flow(.data, normalized = TRUE) - -tie_betweenness(.data, normalized = TRUE) - -network_betweenness( - .data, - normalized = TRUE, - direction = c("all", "out", "in") -) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{normalized}{Logical scalar, whether the centrality scores are normalized. -Different denominators are used depending on whether the object is one-mode or two-mode, -the type of centrality, and other arguments.} - -\item{cutoff}{The maximum path length to consider when calculating betweenness. -If negative or NULL (the default), there's no limit to the path lengths considered.} - -\item{direction}{Character string, “out” bases the measure on outgoing ties, -“in” on incoming ties, and "all" on either/the sum of the two. -For two-mode networks, "all" uses as numerator the sum of differences -between the maximum centrality score for the mode -against all other centrality scores in the network, -whereas "in" uses as numerator the sum of differences -between the maximum centrality score for the mode -against only the centrality scores of the other nodes in that mode.} -} -\value{ -A numeric vector giving the betweenness centrality measure of each node. -} -\description{ -These functions calculate common betweenness-related centrality measures for one- and two-mode networks: -\itemize{ -\item \code{node_betweenness()} measures the betweenness centralities of nodes in a network. -\item \code{node_induced()} measures the induced betweenness centralities of nodes in a network. -\item \code{node_flow()} measures the flow betweenness centralities of nodes in a network, -which uses an electrical current model for information spreading -in contrast to the shortest paths model used by normal betweenness centrality. -\item \code{tie_betweenness()} measures the number of shortest paths going through a tie. -\item \code{network_betweenness()} measures the betweenness centralization for a network. -} - -All measures attempt to use as much information as they are offered, -including whether the networks are directed, weighted, or multimodal. -If this would produce unintended results, -first transform the salient properties using e.g. \code{\link[=to_undirected]{to_undirected()}} functions. -All centrality and centralization measures return normalized measures by default, -including for two-mode networks. -} -\examples{ -node_betweenness(mpn_elite_mex) -node_betweenness(ison_southern_women) -node_induced(mpn_elite_mex) -(tb <- tie_betweenness(ison_adolescents)) -plot(tb) -#ison_adolescents \%>\% mutate_ties(weight = tb) \%>\% -# autographr() -network_betweenness(ison_southern_women, direction = "in") -} -\references{ -Everett, Martin and Steve Borgatti. 2010. -"Induced, endogenous and exogenous centrality" -\emph{Social Networks}, 32: 339-344. -\doi{10.1016/j.socnet.2010.06.004} -} -\seealso{ -Other centrality: -\code{\link{close_centrality}}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}} - -Other measures: -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{centrality} -\concept{measures} diff --git a/man/brokerage_census.Rd b/man/brokerage_census.Rd deleted file mode 100644 index 75daea708..000000000 --- a/man/brokerage_census.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/motif_census.R -\name{brokerage_census} -\alias{brokerage_census} -\alias{node_brokerage_census} -\alias{network_brokerage_census} -\alias{node_brokering_activity} -\alias{node_brokering_exclusivity} -\alias{node_brokering} -\title{Censuses of brokerage motifs} -\usage{ -node_brokerage_census(.data, membership, standardized = FALSE) - -network_brokerage_census(.data, membership, standardized = FALSE) - -node_brokering_activity(.data, membership) - -node_brokering_exclusivity(.data, membership) - -node_brokering(.data, membership) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{membership}{A vector of partition membership as integers.} - -\item{standardized}{Whether the score should be standardized -into a \emph{z}-score indicating how many standard deviations above -or below the average the score lies.} -} -\description{ -These functions include ways to take a census of the brokerage positions of nodes -in a network: -\itemize{ -\item \code{node_brokerage_census()} returns the Gould-Fernandez brokerage -roles played by nodes in a network. -\item \code{network_brokerage_census()} returns the Gould-Fernandez brokerage -roles in a network. -} -} -\examples{ -node_brokerage_census(manynet::ison_networkers, "Discipline") -network_brokerage_census(manynet::ison_networkers, "Discipline") -node_brokering_exclusivity(ison_networkers, "Discipline") -} -\references{ -Gould, R.V. and Fernandez, R.M. 1989. -“Structures of Mediation: A Formal Approach to Brokerage in Transaction Networks.” -\emph{Sociological Methodology}, 19: 89-126. - -Jasny, Lorien, and Mark Lubell. 2015. -“Two-Mode Brokerage in Policy Networks.” -\emph{Social Networks} 41:36–47. -\doi{10.1016/j.socnet.2014.11.005}. - -Hamilton, Matthew, Jacob Hileman, and Orjan Bodin. 2020. -"Evaluating heterogeneous brokerage: New conceptual and methodological approaches -and their application to multi-level environmental governance networks" -\emph{Social Networks} 61: 1-10. -\doi{10.1016/j.socnet.2019.08.002} -} -\seealso{ -Other motifs: -\code{\link{network_census}}, -\code{\link{node_census}} -} -\concept{motifs} diff --git a/man/cliques.Rd b/man/cliques.Rd deleted file mode 100644 index cf1021c25..000000000 --- a/man/cliques.Rd +++ /dev/null @@ -1,84 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/member_cliques.R -\name{cliques} -\alias{cliques} -\alias{node_roulette} -\title{Clique partitioning algorithms} -\usage{ -node_roulette(.data, num_groups, group_size, times = NULL) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{num_groups}{An integer indicating the number of groups desired.} - -\item{group_size}{An integer indicating the desired size of most of the groups. -Note that if the number of nodes is not divisible into groups of equal size, -there may be some larger or smaller groups.} - -\item{times}{An integer of the number of search iterations the algorithm should complete. -By default this is the number of nodes in the network multiplied by the number of groups. -This heuristic may be insufficient for small networks and numbers of groups, -and burdensome for large networks and numbers of groups, but can be overwritten. -At every 10th iteration, a stronger perturbation of a number of successive changes, -approximately the number of nodes divided by the number of groups, -will take place irrespective of whether it improves the objective function.} -} -\description{ -These functions create a vector of nodes' memberships in -cliques: -\itemize{ -\item \code{node_roulette()} assigns nodes to maximally diverse groups. -} -} -\section{Maximally diverse grouping problem}{ - -This well known computational problem is a NP-hard problem -with a number of relevant applications, -including the formation of groups of students that have encountered -each other least or least recently. -Essentially, the aim is to return a membership of nodes in cliques -that minimises the sum of their previous (weighted) ties: - -\deqn{\sum_{g=1}^{m} \sum_{i=1}^{n-1} \sum_{j=i+1}^{n} x_{ij} y_{ig} y_{jg}} - -where \eqn{y_{ig} = 1} if node \eqn{i} is in group \eqn{g}, and 0 otherwise. - -\eqn{x_{ij}} is the existing network data. -If this is an empty network, the function will just return cliques. -To run this repeatedly, one can join a clique network of the membership result -with the original network, using this as the network data for the next round. - -A form of the Lai and Hao (2016) iterated maxima search (IMS) is used here. -This performs well for small and moderately sized networks. -It includes both weak and strong perturbations to an initial solution -to ensure that a robust solution from the broader state space is identified. -The user is referred to Lai and Hao (2016) and Lai et al (2021) for more details. -} - -\references{ -Lai, Xiangjing, and Jin-Kao Hao. 2016. -“Iterated Maxima Search for the Maximally Diverse Grouping Problem.” -\emph{European Journal of Operational Research} 254(3):780–800. -\doi{10.1016/j.ejor.2016.05.018}. - -Lai, Xiangjing, Jin-Kao Hao, Zhang-Hua Fu, and Dong Yue. 2021. -“Neighborhood Decomposition Based Variable Neighborhood Search and Tabu Search for Maximally Diverse Grouping.” -\emph{European Journal of Operational Research} 289(3):1067–86. -\doi{10.1016/j.ejor.2020.07.048}. -} -\seealso{ -Other memberships: -\code{\link{community}}, -\code{\link{components}()}, -\code{\link{core}}, -\code{\link{equivalence}} -} -\concept{memberships} diff --git a/man/close_centrality.Rd b/man/close_centrality.Rd deleted file mode 100644 index f943f41f6..000000000 --- a/man/close_centrality.Rd +++ /dev/null @@ -1,122 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_centrality.R -\name{close_centrality} -\alias{close_centrality} -\alias{node_closeness} -\alias{node_reach} -\alias{node_harmonic} -\alias{node_information} -\alias{tie_closeness} -\alias{network_closeness} -\alias{network_reach} -\alias{network_harmonic} -\title{Measures of closeness-like centrality and centralisation} -\usage{ -node_closeness(.data, normalized = TRUE, direction = "out", cutoff = NULL) - -node_reach(.data, normalized = TRUE, k = 2) - -node_harmonic(.data, normalized = TRUE, k = -1) - -node_information(.data, normalized = TRUE) - -tie_closeness(.data, normalized = TRUE) - -network_closeness(.data, normalized = TRUE, direction = c("all", "out", "in")) - -network_reach(.data, normalized = TRUE, k = 2) - -network_harmonic(.data, normalized = TRUE, k = 2) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{normalized}{Logical scalar, whether the centrality scores are normalized. -Different denominators are used depending on whether the object is one-mode or two-mode, -the type of centrality, and other arguments.} - -\item{direction}{Character string, “out” bases the measure on outgoing ties, -“in” on incoming ties, and "all" on either/the sum of the two. -For two-mode networks, "all" uses as numerator the sum of differences -between the maximum centrality score for the mode -against all other centrality scores in the network, -whereas "in" uses as numerator the sum of differences -between the maximum centrality score for the mode -against only the centrality scores of the other nodes in that mode.} - -\item{cutoff}{Maximum path length to use during calculations.} - -\item{k}{Integer of steps out to calculate reach} -} -\description{ -These functions calculate common closeness-related centrality measures for one- and two-mode networks: -\itemize{ -\item \code{node_closeness()} measures the closeness centrality of nodes in a network. -\item \code{node_reach()} measures nodes' reach centrality, -or how many nodes they can reach within \emph{k} steps. -\item \code{node_harmonic()} measures nodes' harmonic centrality or valued centrality, -which is thought to behave better than reach centrality for disconnected networks. -\item \code{node_information()} measures nodes' information centrality or -current-flow closeness centrality. -\item \code{tie_closeness()} measures the closeness of each tie to other ties in the network. -\item \code{network_closeness()} measures a network's closeness centralization. -\item \code{network_reach()} measures a network's reach centralization. -\item \code{network_harmonic()} measures a network's harmonic centralization. -} - -All measures attempt to use as much information as they are offered, -including whether the networks are directed, weighted, or multimodal. -If this would produce unintended results, -first transform the salient properties using e.g. \code{\link[=to_undirected]{to_undirected()}} functions. -All centrality and centralization measures return normalized measures by default, -including for two-mode networks. -} -\examples{ -node_closeness(mpn_elite_mex) -node_closeness(ison_southern_women) -node_reach(ison_adolescents) -(ec <- tie_closeness(ison_adolescents)) -plot(ec) -#ison_adolescents \%>\% -# activate(edges) \%>\% mutate(weight = ec) \%>\% -# autographr() -network_closeness(ison_southern_women, direction = "in") -} -\references{ -Marchiori, M, and V Latora. 2000. -"Harmony in the small-world". -\emph{Physica A} 285: 539-546. - -Dekker, Anthony. 2005. -"Conceptual distance in social network analysis". -\emph{Journal of Social Structure} 6(3). -} -\seealso{ -Other centrality: -\code{\link{between_centrality}}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}} - -Other measures: -\code{\link{between_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{centrality} -\concept{measures} diff --git a/man/closure.Rd b/man/closure.Rd deleted file mode 100644 index 18502e58d..000000000 --- a/man/closure.Rd +++ /dev/null @@ -1,104 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_closure.R -\name{closure} -\alias{closure} -\alias{network_reciprocity} -\alias{node_reciprocity} -\alias{network_transitivity} -\alias{node_transitivity} -\alias{network_equivalency} -\alias{network_congruency} -\title{Measures of network closure} -\usage{ -network_reciprocity(.data, method = "default") - -node_reciprocity(.data) - -network_transitivity(.data) - -node_transitivity(.data) - -network_equivalency(.data) - -network_congruency(.data, object2) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{method}{For reciprocity, either \code{default} or \code{ratio}. -See \code{?igraph::reciprocity}} - -\item{object2}{Optionally, a second (two-mode) matrix, igraph, or tidygraph} -} -\description{ -These functions offer methods for summarising the closure in configurations -in one-, two-, and three-mode networks: -\itemize{ -\item \code{network_reciprocity()} measures reciprocity in a (usually directed) network. -\item \code{node_reciprocity()} measures nodes' reciprocity. -\item \code{network_transitivity()} measures transitivity in a network. -\item \code{node_transitivity()} measures nodes' transitivity. -\item \code{network_equivalency()} measures equivalence or reinforcement -in a (usually two-mode) network. -\item \code{network_congruency()} measures congruency across two two-mode networks. -} -} -\details{ -For one-mode networks, shallow wrappers of igraph versions exist via -\code{network_reciprocity} and \code{network_transitivity}. - -For two-mode networks, \code{network_equivalency} calculates the proportion of three-paths in the network -that are closed by fourth tie to establish a "shared four-cycle" structure. - -For three-mode networks, \code{network_congruency} calculates the proportion of three-paths -spanning two two-mode networks that are closed by a fourth tie to establish a -"congruent four-cycle" structure. -} -\section{Equivalency}{ - -The \code{network_equivalency()} function calculates the Robins and Alexander (2004) -clustering coefficient for two-mode networks. -Note that for weighted two-mode networks, the result is divided by the average tie weight. -} - -\examples{ -network_reciprocity(ison_southern_women) -node_reciprocity(to_unweighted(ison_networkers)) -network_transitivity(ison_adolescents) -node_transitivity(ison_adolescents) -network_equivalency(ison_southern_women) -} -\references{ -Robins, Garry L, and Malcolm Alexander. 2004. -Small worlds among interlocking directors: Network structure and distance in bipartite graphs. -\emph{Computational & Mathematical Organization Theory} 10(1): 69–94. -\doi{10.1023/B:CMOT.0000032580.12184.c0}. - -Knoke, David, Mario Diani, James Hollway, and Dimitris C Christopoulos. 2021. -\emph{Multimodal Political Networks}. -Cambridge University Press. Cambridge University Press. -\doi{10.1017/9781108985000} -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/cluster.Rd b/man/cluster.Rd deleted file mode 100644 index 076ee2f02..000000000 --- a/man/cluster.Rd +++ /dev/null @@ -1,68 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/model_cluster.R -\name{cluster} -\alias{cluster} -\alias{cluster_hierarchical} -\alias{cluster_concor} -\title{Methods for equivalence clustering} -\usage{ -cluster_hierarchical(census, distance) - -cluster_concor(.data, census) -} -\arguments{ -\item{census}{A matrix returned by a \verb{node_*_census()} function.} - -\item{distance}{Character string indicating which distance metric -to pass on to \code{stats::dist}. -By default \code{"euclidean"}, but other options include -\code{"maximum"}, \code{"manhattan"}, \code{"canberra"}, \code{"binary"}, and \code{"minkowski"}. -Fewer, identifiable letters, e.g. \code{"e"} for Euclidean, is sufficient.} - -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These functions are used to cluster some census object: -\itemize{ -\item \code{cluster_hierarchical()} returns a hierarchical clustering object -created by \code{stats::hclust()}. -\item \code{cluster_concor()} returns a hierarchical clustering object -created from a convergence of correlations procedure (CONCOR). -} - -These functions are not intended to be called directly, -but are called within \code{node_equivalence()} and related functions. -They are exported and listed here to provide more detailed documentation. -} -\section{CONCOR}{ - - -First a matrix of Pearson correlation coefficients between each pair of nodes -profiles in the given census is created. -Then, again, we find the correlations of this square, symmetric matrix, -and continue to do this iteratively until each entry is either \code{1} or \code{-1}. -These values are used to split the data into two partitions, -with members either holding the values \code{1} or \code{-1}. -This procedure from census to convergence is then repeated within each block, -allowing further partitions to be found. -Unlike UCINET, partitions are continued until there are single members in -each partition. -Then a distance matrix is constructed from records of in which partition phase -nodes were separated, -and this is given to \code{stats::hclust()} so that dendrograms etc can be returned. -} - -\references{ -Breiger, Ronald L., Scott A. Boorman, and Phipps Arabie. 1975. -"An Algorithm for Clustering Relational Data with Applications to -Social Network Analysis and Comparison with Multidimensional Scaling". -\emph{Journal of Mathematical Psychology}, 12: 328-83. -\doi{10.1016/0022-2496(75)90028-0}. -} diff --git a/man/cohesion.Rd b/man/cohesion.Rd deleted file mode 100644 index 44828a626..000000000 --- a/man/cohesion.Rd +++ /dev/null @@ -1,91 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_cohesion.R -\name{cohesion} -\alias{cohesion} -\alias{network_density} -\alias{network_components} -\alias{network_cohesion} -\alias{network_adhesion} -\alias{network_diameter} -\alias{network_length} -\title{Measures of network cohesion or connectedness} -\usage{ -network_density(.data) - -network_components(.data) - -network_cohesion(.data) - -network_adhesion(.data) - -network_diameter(.data) - -network_length(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These functions return values or vectors relating to how connected a network is -and the number of nodes or edges to remove that would increase fragmentation. -\itemize{ -\item \code{network_density()} measures the ratio of ties to the number -of possible ties. -\item \code{network_components()} measures the number of (strong) components -in the network. -\item \code{network_cohesion()} measures the minimum number of nodes to remove -from the network needed to increase the number of components. -\item \code{network_adhesion()} measures the minimum number of ties to remove -from the network needed to increase the number of components. -\item \code{network_diameter()} measures the maximum path length in the network. -\item \code{network_length()} measures the average path length in the network. -} -} -\section{Cohesion}{ - -To get the 'weak' components of a directed graph, -please use \code{manynet::to_undirected()} first. -} - -\examples{ -network_density(mpn_elite_mex) -network_density(mpn_elite_usa_advice) - network_components(mpn_ryanair) - network_components(manynet::to_undirected(mpn_ryanair)) -network_cohesion(manynet::ison_marvel_relationships) -network_cohesion(manynet::to_giant(manynet::ison_marvel_relationships)) -network_adhesion(manynet::ison_marvel_relationships) -network_adhesion(manynet::to_giant(manynet::ison_marvel_relationships)) -network_diameter(manynet::ison_marvel_relationships) -network_diameter(manynet::to_giant(manynet::ison_marvel_relationships)) -network_length(manynet::ison_marvel_relationships) -network_length(manynet::to_giant(manynet::ison_marvel_relationships)) -} -\references{ -White, Douglas R and Frank Harary. 2001. -"The Cohesiveness of Blocks In Social Networks: Node Connectivity and Conditional Density." -\emph{Sociological Methodology} 31(1): 305-59. -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/community.Rd b/man/community.Rd deleted file mode 100644 index 74c2d8416..000000000 --- a/man/community.Rd +++ /dev/null @@ -1,273 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/member_community.R -\name{community} -\alias{community} -\alias{node_optimal} -\alias{node_kernighanlin} -\alias{node_edge_betweenness} -\alias{node_fast_greedy} -\alias{node_leading_eigen} -\alias{node_walktrap} -\alias{node_infomap} -\alias{node_spinglass} -\alias{node_fluid} -\alias{node_louvain} -\alias{node_leiden} -\title{Community partitioning algorithms} -\usage{ -node_optimal(.data) - -node_kernighanlin(.data) - -node_edge_betweenness(.data) - -node_fast_greedy(.data) - -node_leading_eigen(.data) - -node_walktrap(.data, times = 50) - -node_infomap(.data, times = 50) - -node_spinglass(.data, max_k = 200, resolution = 1) - -node_fluid(.data) - -node_louvain(.data, resolution = 1) - -node_leiden(.data, resolution = 1) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{times}{Integer indicating number of simulations/walks used. -By default, \code{times=50}.} - -\item{max_k}{Integer constant, the number of spins to use as an upper limit -of communities to be found. Some sets can be empty at the end.} - -\item{resolution}{The Reichardt-Bornholdt “gamma” resolution parameter for modularity. -By default 1, making existing and non-existing ties equally important. -Smaller values make existing ties more important, -and larger values make missing ties more important.} -} -\description{ -These functions offer different algorithms useful for partitioning -networks into sets of communities: -\itemize{ -\item \code{node_optimal()} is a problem-solving algorithm that seeks to maximise -modularity over all possible partitions. -\item \code{node_kernaghinlin()} is a greedy, iterative, deterministic -partitioning algorithm that results in two equally-sized communities. -\item \code{node_edge_betweenness()} is a hierarchical, decomposition algorithm -where edges are removed in decreasing order of the number of -shortest paths passing through the edge. -\item \code{node_fast_greedy()} is a hierarchical, agglomerative algorithm, -that tries to optimize modularity in a greedy manner. -\item \code{node_leading_eigen()} is a top-down, hierarchical algorithm. -\item \code{node_walktrap()} is a hierarchical, agglomerative algorithm based on random walks. -\item \code{node_infomap()} is a hierarchical algorithm based on the information in random walks. -\item \code{node_spinglass()} is a greedy, iterative, probabilistic algorithm, -based on analogy to model from statistical physics. -\item \code{node_fluid()} is a propogation-based partitioning algorithm, -based on analogy to model from fluid dynamics. -\item \code{node_louvain()} is an agglomerative multilevel algorithm that seeks to maximise -modularity over all possible partitions. -\item \code{node_leiden()} is an agglomerative multilevel algorithm that seeks to maximise -the Constant Potts Model over all possible partitions. -} - -The different algorithms offer various advantages in terms of computation time, -availability on different types of networks, ability to maximise modularity, -and their logic or domain of inspiration. -} -\section{Optimal}{ - -The general idea is to calculate the modularity of all possible partitions, -and choose the community structure that maximises this modularity measure. -Note that this is an NP-complete problem with exponential time complexity. -The guidance in the igraph package is networks of <50-200 nodes is probably fine. -} - -\section{Edge-betweenness}{ - -This is motivated by the idea that edges connecting different groups -are more likely to lie on multiple shortest paths when they are the -only option to go from one group to another. -This method yields good results but is very slow because of -the computational complexity of edge-betweenness calculations and -the betweenness scores have to be re-calculated after every edge removal. -Networks of ~700 nodes and ~3500 ties are around the upper size limit -that are feasible with this approach. -} - -\section{Fast-greedy}{ - -Initially, each node is assigned a separate community. -Communities are then merged iteratively such that each merge -yields the largest increase in the current value of modularity, -until no further increases to the modularity are possible. -The method is fast and recommended as a first approximation -because it has no parameters to tune. -However, it is known to suffer from a resolution limit. -} - -\section{Leading eigenvector}{ - -In each step, the network is bifurcated such that modularity increases most. -The splits are determined according to the leading eigenvector of the modularity matrix. -A stopping condition prevents tightly connected groups from being split further. -Note that due to the eigenvector calculations involved, -this algorithm will perform poorly on degenerate networks, -but will likely obtain a higher modularity than fast-greedy (at some cost of speed). -} - -\section{Walktrap}{ - -The general idea is that random walks on a network are more likely to stay -within the same community because few edges lead outside a community. -By repeating random walks of 4 steps many times, -information about the hierarchical merging of communities is collected. -} - -\section{Infomap}{ - -Motivated by information theoretic principles, this algorithm tries to build -a grouping that provides the shortest description length for a random walk, -where the description length is measured by the expected number of bits per node required to encode the path. -} - -\section{Spin-glass}{ - -This is motivated by analogy to the Potts model in statistical physics. -Each node can be in one of \emph{k} "spin states", -and ties (particle interactions) provide information about which pairs of nodes -want similar or different spin states. -The final community definitions are represented by the nodes' spin states -after a number of updates. -A different implementation than the default is used in the case of signed networks, -such that nodes connected by negative ties will be more likely found in separate communities. -} - -\section{Fluid}{ - -The general idea is to observe how a discrete number of fluids interact, expand and contract, -in a non-homogenous environment, i.e. the network structure. -Unlike the \code{{igraph}} implementation that this function wraps, -this function iterates over all possible numbers of communities and returns the membership -associated with the highest modularity. -} - -\section{Louvain}{ - -The general idea is to take a hierarchical approach to optimising the modularity criterion. -Nodes begin in their own communities and are re-assigned in a local, greedy way: -each node is moved to the community where it achieves the highest contribution to modularity. -When no further modularity-increasing reassignments are possible, -the resulting communities are considered nodes (like a reduced graph), -and the process continues. -} - -\section{Leiden}{ - -The general idea is to optimise the Constant Potts Model, -which does not suffer from the resolution limit, instead of modularity. -As outlined in the \code{{igraph}} package, -the Constant Potts Model object function is: - -\deqn{\frac{1}{2m} \sum_{ij}(A_{ij}-\gamma n_i n_j)\delta(\sigma_i, \sigma_j)} - -where \emph{m} is the total tie weight, -\eqn{A_{ij}} is the tie weight between \emph{i} and \emph{j}, -\eqn{\gamma} is the so-called resolution parameter, -\eqn{n_i} is the node weight of node \emph{i}, -and \eqn{\delta(\sigma_i, \sigma_j) = 1} if and only if -\emph{i} and \emph{j} are in the same communities and 0 otherwise. -} - -\examples{ -node_optimal(ison_adolescents) -node_kernighanlin(ison_adolescents) -node_kernighanlin(ison_southern_women) -node_edge_betweenness(ison_adolescents) -node_fast_greedy(ison_adolescents) -node_leading_eigen(ison_adolescents) -node_walktrap(ison_adolescents) -node_infomap(ison_adolescents) -node_spinglass(ison_adolescents) -node_fluid(ison_adolescents) -node_louvain(ison_adolescents) -node_leiden(ison_adolescents) -} -\references{ -Brandes, Ulrik, Daniel Delling, Marco Gaertler, Robert Gorke, Martin Hoefer, Zoran Nikoloski, Dorothea Wagner. 2008. -"On Modularity Clustering", -\emph{IEEE Transactions on Knowledge and Data Engineering} 20(2):172-188. - -Kernighan, Brian W., and Shen Lin. 1970. -"An efficient heuristic procedure for partitioning graphs." -\emph{The Bell System Technical Journal} 49(2): 291-307. -\doi{10.1002/j.1538-7305.1970.tb01770.x} - -Newman, M, and M Girvan. 2004. -"Finding and evaluating community structure in networks." -\emph{Physical Review E} 69: 026113. - -Clauset, A, MEJ Newman, MEJ and C Moore. -"Finding community structure in very large networks." - -Newman, MEJ. 2006. -"Finding community structure using the eigenvectors of matrices" -\emph{Physical Review E} 74:036104. - -Pons, Pascal, and Matthieu Latapy -"Computing communities in large networks using random walks". - -Rosvall, M, and C. T. Bergstrom. 2008. -"Maps of information flow reveal community structure in complex networks", -\emph{PNAS} 105:1118. -\doi{10.1073/pnas.0706851105} - -Rosvall, M., D. Axelsson, and C. T. Bergstrom. 2009. -"The map equation", -\emph{Eur. Phys. J. Special Topics} 178: 13. -\doi{10.1140/epjst/e2010-01179-1} - -Reichardt, Jorg, and Stefan Bornholdt. 2006. -"Statistical Mechanics of Community Detection" -\emph{Physical Review E}, 74(1): 016110–14. -\doi{10.1073/pnas.0605965104} - -Traag, VA, and Jeroen Bruggeman. 2008. -"Community detection in networks with positive and negative links". - -Parés F, Gasulla DG, et. al. 2018. -"Fluid Communities: A Competitive, Scalable and Diverse Community Detection Algorithm". -In: \emph{Complex Networks & Their Applications VI} -Springer, 689: 229. -\doi{10.1007/978-3-319-72150-7_19} - -Blondel, Vincent, Jean-Loup Guillaume, Renaud Lambiotte, Etienne Lefebvre. 2008. -"Fast unfolding of communities in large networks", -\emph{J. Stat. Mech.} P10008. - -Traag, V. A., L Waltman, and NJ van Eck. 2019. -"From Louvain to Leiden: guaranteeing well-connected communities", -\emph{Scientific Reports}, 9(1):5233. -\doi{10.1038/s41598-019-41695-z} -} -\seealso{ -Other memberships: -\code{\link{cliques}}, -\code{\link{components}()}, -\code{\link{core}}, -\code{\link{equivalence}} -} -\concept{memberships} diff --git a/man/components.Rd b/man/components.Rd deleted file mode 100644 index 3d604ad63..000000000 --- a/man/components.Rd +++ /dev/null @@ -1,66 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/member_components.R -\name{components} -\alias{components} -\alias{node_components} -\alias{node_weak_components} -\alias{node_strong_components} -\title{Component partitioning algorithms} -\usage{ -node_components(.data) - -node_weak_components(.data) - -node_strong_components(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These functions create a vector of nodes' memberships in -components or degrees of coreness: -\itemize{ -\item \code{node_components()} assigns nodes' component membership -using edge direction where available. -\item \code{node_weak_components()} assigns nodes' component membership -ignoring edge direction. -\item \code{node_strong_components()} assigns nodes' component membership -based on edge direction. -\item \code{node_roulette()} -} - -In graph theory, components, sometimes called connected components, -are induced subgraphs from partitioning the nodes into disjoint sets. -All nodes that are members of the same partition as \emph{i} are reachable -from \emph{i}. - -For directed networks, -strongly connected components consist of subgraphs where there are paths -in each direction between member nodes. -Weakly connected components consist of subgraphs where there is a path -in either direction between member nodes. - -Coreness captures the maximal subgraphs in which each vertex has at least -degree \emph{k}, where \emph{k} is also the order of the subgraph. -As described in \code{igraph::coreness}, -a node's coreness is \emph{k} if it belongs to the \emph{k}-core -but not to the (\emph{k}+1)-core. -} -\examples{ -node_components(mpn_bristol) -} -\seealso{ -Other memberships: -\code{\link{cliques}}, -\code{\link{community}}, -\code{\link{core}}, -\code{\link{equivalence}} -} -\concept{memberships} diff --git a/man/core.Rd b/man/core.Rd deleted file mode 100644 index b19c09216..000000000 --- a/man/core.Rd +++ /dev/null @@ -1,75 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/member_core.R -\name{core} -\alias{core} -\alias{node_core} -\alias{node_coreness} -\title{Core-periphery clustering algorithms} -\usage{ -node_core(.data, method = c("degree", "eigenvector")) - -node_coreness(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{method}{Which method to use to identify cores and periphery. -By default this is "degree", -which relies on the heuristic that high degree nodes are more likely to be in the core. -An alternative is "eigenvector", which instead begins with high eigenvector nodes. -Other methods, such as a genetic algorithm, CONCOR, and Rombach-Porter, -can be added if there is interest.} -} -\description{ -These functions identify nodes belonging to (some level of) the core of a network: -\itemize{ -\item \code{node_core()} assigns nodes to either the core or periphery. -\item \code{node_coreness()} assigns nodes to their level of k-coreness. -} -} -\section{Core-periphery}{ - -This function is used to identify which nodes should belong to the core, -and which to the periphery. -It seeks to minimize the following quantity: -\deqn{Z(S_1) = \sum_{(i\% as_tidygraph \%>\% -# mutate(corep = node_core(mpn_elite_usa_advice)) \%>\% -# autographr(node_color = "corep") -network_core(mpn_elite_usa_advice) -node_coreness(ison_adolescents) -} -\references{ -Borgatti, Stephen P., & Everett, Martin G. 1999. -Models of core /periphery structures. -\emph{Social Networks}, 21, 375–395. -\doi{10.1016/S0378-8733(99)00019-2} - -Lip, Sean Z. W. 2011. -“A Fast Algorithm for the Discrete Core/Periphery Bipartitioning Problem.” -\doi{10.48550/arXiv.1102.5511} -} -\seealso{ -Other memberships: -\code{\link{cliques}}, -\code{\link{community}}, -\code{\link{components}()}, -\code{\link{equivalence}} -} -\concept{memberships} diff --git a/man/defunct.Rd b/man/defunct.Rd index c68e09f4e..359fd9396 100644 --- a/man/defunct.Rd +++ b/man/defunct.Rd @@ -2,133 +2,10 @@ % Please edit documentation in R/migraph-defunct.R \name{defunct} \alias{defunct} -\alias{edge_betweenness} -\alias{edge_closeness} -\alias{edge_degree} -\alias{edge_eigenvector} -\alias{edge_loop} -\alias{edge_multiple} -\alias{edge_bridges} -\alias{edge_reciprocal} -\alias{node_cuts} -\alias{graph_blau_index} -\alias{graph_diversity} -\alias{graph_ei_index} -\alias{graph_homophily} -\alias{plot.graph_test} -\alias{print.graph_test} -\alias{print.graph_measure} -\alias{print.graph_motif} -\alias{graph_adhesion} -\alias{graph_cohesion} -\alias{graph_assortativity} -\alias{graph_balance} -\alias{graph_betweenness} -\alias{graph_closeness} -\alias{graph_degree} -\alias{graph_eigenvector} -\alias{graph_components} -\alias{graph_factions} -\alias{graph_congruency} -\alias{graph_equivalency} -\alias{graph_core} -\alias{graph_density} -\alias{graph_reciprocity} -\alias{graph_transitivity} -\alias{graph_diameter} -\alias{graph_length} -\alias{graph_dyad_census} -\alias{graph_triad_census} -\alias{graph_mixed_census} -\alias{graph_modularity} -\alias{graph_smallworld} -\alias{network_homophily} -\alias{node_homophily} +\alias{test_gof} \title{Functions that have been renamed, superseded, or are no longer working} \usage{ -edge_betweenness(object, normalized = TRUE) - -edge_closeness(object, normalized = TRUE) - -edge_degree(object, normalized = TRUE) - -edge_eigenvector(object, normalized = TRUE) - -edge_loop(object) - -edge_multiple(object) - -edge_bridges(object) - -edge_reciprocal(object) - -node_cuts(object) - -graph_blau_index(object, attribute, clusters = NULL) - -graph_diversity(object, attribute, clusters = NULL) - -graph_ei_index(object, attribute) - -graph_homophily(object, attribute) - -\method{plot}{graph_test}(x, ..., threshold = 0.95, tails = c("two", "one")) - -\method{print}{graph_test}(x, ..., max.length = 6, digits = 3) - -\method{print}{graph_measure}(x, ..., digits = 3) - -\method{print}{graph_motif}(x, ...) - -graph_adhesion(object) - -graph_cohesion(object) - -graph_assortativity(object) - -graph_balance(object) - -graph_betweenness(object, normalized = TRUE, direction = c("all", "out", "in")) - -graph_closeness(object, normalized = TRUE, direction = c("all", "out", "in")) - -graph_degree(object, normalized = TRUE, direction = c("all", "out", "in")) - -graph_eigenvector(object, normalized = TRUE) - -graph_components(object) - -graph_factions(object, membership = NULL) - -graph_congruency(object, object2) - -graph_equivalency(object) - -graph_core(object, membership = NULL) - -graph_density(object) - -graph_reciprocity(object, method = "default") - -graph_transitivity(object) - -graph_diameter(object) - -graph_length(object) - -graph_dyad_census(object) - -graph_triad_census(object) - -graph_mixed_census(object, object2) - -graph_modularity(object, membership = NULL, resolution = 1) - -graph_smallworld(object, times = 100) - -network_homophily(object, attribute) - -node_homophily(object, attribute) +test_gof(diff_model, diff_models) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} @@ -141,89 +18,7 @@ wherever possible and update your scripts accordingly. } \section{Functions}{ \itemize{ -\item \code{edge_betweenness()}: Deprecated on 2022-06-28. - -\item \code{edge_closeness()}: Deprecated on 2022-06-28. - -\item \code{edge_degree()}: Deprecated on 2022-06-28. - -\item \code{edge_eigenvector()}: Deprecated on 2022-06-28. - -\item \code{edge_loop()}: Deprecated on 2022-06-28. - -\item \code{edge_multiple()}: Deprecated on 2022-06-28. - -\item \code{edge_bridges()}: Deprecated on 2022-06-28. - -\item \code{edge_reciprocal()}: Deprecated on 2022-06-28. - -\item \code{node_cuts()}: Deprecated on 2022-06-30. - -\item \code{graph_blau_index()}: Deprecated on 2022-09-10. - -\item \code{graph_diversity()}: Deprecated on 2022-09-25. - -\item \code{graph_ei_index()}: Deprecated on 2022-09-10. - -\item \code{graph_homophily()}: Deprecated on 2022-09-25. - -\item \code{plot(graph_test)}: Deprecated on 2022-09-25. - -\item \code{print(graph_test)}: Deprecated on 2022-09-25. - -\item \code{print(graph_measure)}: Deprecated on 2022-09-25. - -\item \code{print(graph_motif)}: Deprecated on 2022-09-25. - -\item \code{graph_adhesion()}: Deprecated on 2022-09-25. - -\item \code{graph_cohesion()}: Deprecated on 2022-09-25. - -\item \code{graph_assortativity()}: Deprecated on 2022-09-25. - -\item \code{graph_balance()}: Deprecated on 2022-09-25. - -\item \code{graph_betweenness()}: Deprecated on 2022-09-25. - -\item \code{graph_closeness()}: Deprecated on 2022-09-25. - -\item \code{graph_degree()}: Deprecated on 2022-09-25. - -\item \code{graph_eigenvector()}: Deprecated on 2022-09-25. - -\item \code{graph_components()}: Deprecated on 2022-09-25. - -\item \code{graph_factions()}: Deprecated on 2022-09-25. - -\item \code{graph_congruency()}: Deprecated on 2022-09-25. - -\item \code{graph_equivalency()}: Deprecated on 2022-09-25. - -\item \code{graph_core()}: Deprecated on 2022-09-25. - -\item \code{graph_density()}: Deprecated on 2022-09-25. - -\item \code{graph_reciprocity()}: Deprecated on 2022-09-25. - -\item \code{graph_transitivity()}: Deprecated on 2022-09-25. - -\item \code{graph_diameter()}: Deprecated on 2022-09-25. - -\item \code{graph_length()}: Deprecated on 2022-09-25. - -\item \code{graph_dyad_census()}: Deprecated on 2022-09-25. - -\item \code{graph_triad_census()}: Deprecated on 2022-09-25. - -\item \code{graph_mixed_census()}: Deprecated on 2022-09-25. - -\item \code{graph_modularity()}: Deprecated on 2022-09-25. - -\item \code{graph_smallworld()}: Deprecated on 2022-09-25. - -\item \code{network_homophily()}: Deprecated on 2022-09-25. - -\item \code{node_homophily()}: Deprecated on 2022-09-25. +\item \code{test_gof()}: Deprecated on 2024-06-16. }} \keyword{internal} diff --git a/man/degree_centrality.Rd b/man/degree_centrality.Rd deleted file mode 100644 index 043ea40dc..000000000 --- a/man/degree_centrality.Rd +++ /dev/null @@ -1,176 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_centrality.R -\name{degree_centrality} -\alias{degree_centrality} -\alias{node_degree} -\alias{node_deg} -\alias{node_outdegree} -\alias{node_indegree} -\alias{node_multidegree} -\alias{node_posneg} -\alias{tie_degree} -\alias{network_degree} -\alias{network_outdegree} -\alias{network_indegree} -\title{Measures of degree-like centrality and centralisation} -\usage{ -node_degree( - .data, - normalized = TRUE, - alpha = 1, - direction = c("all", "out", "in") -) - -node_deg(.data, alpha = 0, direction = c("all", "out", "in")) - -node_outdegree(.data, normalized = TRUE, alpha = 0) - -node_indegree(.data, normalized = TRUE, alpha = 0) - -node_multidegree(.data, tie1, tie2) - -node_posneg(.data) - -tie_degree(.data, normalized = TRUE) - -network_degree(.data, normalized = TRUE, direction = c("all", "out", "in")) - -network_outdegree(.data, normalized = TRUE) - -network_indegree(.data, normalized = TRUE) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{normalized}{Logical scalar, whether the centrality scores are normalized. -Different denominators are used depending on whether the object is one-mode or two-mode, -the type of centrality, and other arguments.} - -\item{alpha}{Numeric scalar, the positive tuning parameter introduced in -Opsahl et al (2010) for trading off between degree and strength centrality measures. -By default, \code{alpha = 0}, which ignores tie weights and the measure is solely based -upon degree (the number of ties). -\code{alpha = 1} ignores the number of ties and provides the sum of the tie weights -as strength centrality. -Values between 0 and 1 reflect different trade-offs in the relative contributions of -degree and strength to the final outcome, with 0.5 as the middle ground. -Values above 1 penalise for the number of ties. -Of two nodes with the same sum of tie weights, the node with fewer ties will obtain -the higher score. -This argument is ignored except in the case of a weighted network.} - -\item{direction}{Character string, “out” bases the measure on outgoing ties, -“in” on incoming ties, and "all" on either/the sum of the two. -For two-mode networks, "all" uses as numerator the sum of differences -between the maximum centrality score for the mode -against all other centrality scores in the network, -whereas "in" uses as numerator the sum of differences -between the maximum centrality score for the mode -against only the centrality scores of the other nodes in that mode.} - -\item{tie1}{Character string indicating the first uniplex network.} - -\item{tie2}{Character string indicating the second uniplex network.} -} -\value{ -A single centralization score if the object was one-mode, -and two centralization scores if the object was two-mode. - -Depending on how and what kind of an object is passed to the function, -the function will return a \code{tidygraph} object where the nodes have been updated -} -\description{ -These functions calculate common degree-related centrality measures for one- and two-mode networks: -\itemize{ -\item \code{node_degree()} measures the degree centrality of nodes in an unweighted network, -or weighted degree/strength of nodes in a weighted network; -there are several related shortcut functions: -\itemize{ -\item \code{node_deg()} returns the unnormalised results. -\item \code{node_indegree()} returns the \code{direction = 'in'} results. -\item \code{node_outdegree()} returns the \code{direction = 'out'} results. -} -\item \code{node_multidegree()} measures the ratio between types of ties in a multiplex network. -\item \code{node_posneg()} measures the PN (positive-negative) centrality of a signed network. -\item \code{tie_degree()} measures the degree centrality of ties in a network -\item \code{network_degree()} measures a network's degree centralization; -there are several related shortcut functions: -\itemize{ -\item \code{network_indegree()} returns the \code{direction = 'out'} results. -\item \code{network_outdegree()} returns the \code{direction = 'out'} results. -} -} - -All measures attempt to use as much information as they are offered, -including whether the networks are directed, weighted, or multimodal. -If this would produce unintended results, -first transform the salient properties using e.g. \code{\link[=to_undirected]{to_undirected()}} functions. -All centrality and centralization measures return normalized measures by default, -including for two-mode networks. -} -\examples{ -node_degree(mpn_elite_mex) -node_degree(ison_southern_women) -tie_degree(ison_adolescents) -network_degree(ison_southern_women, direction = "in") -} -\references{ -Faust, Katherine. 1997. -"Centrality in affiliation networks." -\emph{Social Networks} 19(2): 157-191. -\doi{10.1016/S0378-8733(96)00300-0}. - -Borgatti, Stephen P., and Martin G. Everett. 1997. -"Network analysis of 2-mode data." -\emph{Social Networks} 19(3): 243-270. -\doi{10.1016/S0378-8733(96)00301-2}. - -Borgatti, Stephen P., and Daniel S. Halgin. 2011. -"Analyzing affiliation networks." -In \emph{The SAGE Handbook of Social Network Analysis}, -edited by John Scott and Peter J. Carrington, 417–33. -London, UK: Sage. -\doi{10.4135/9781446294413.n28}. - -Opsahl, Tore, Filip Agneessens, and John Skvoretz. 2010. -"Node centrality in weighted networks: Generalizing degree and shortest paths." -\emph{Social Networks} 32, 245-251. -\doi{10.1016/j.socnet.2010.03.006} - -Everett, Martin G., and Stephen P. Borgatti. 2014. -“Networks Containing Negative Ties.” -\emph{Social Networks} 38:111–20. -\doi{10.1016/j.socnet.2014.03.005}. -} -\seealso{ -\code{\link[=to_undirected]{to_undirected()}} for removing edge directions -and \code{\link[=to_unweighted]{to_unweighted()}} for removing weights from a graph. - -Other centrality: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{eigenv_centrality}} - -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{centrality} -\concept{measures} diff --git a/man/eigenv_centrality.Rd b/man/eigenv_centrality.Rd deleted file mode 100644 index 3716e10cf..000000000 --- a/man/eigenv_centrality.Rd +++ /dev/null @@ -1,166 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_centrality.R -\name{eigenv_centrality} -\alias{eigenv_centrality} -\alias{node_eigenvector} -\alias{node_power} -\alias{node_alpha} -\alias{node_pagerank} -\alias{tie_eigenvector} -\alias{network_eigenvector} -\title{Measures of eigenvector-like centrality and centralisation} -\usage{ -node_eigenvector(.data, normalized = TRUE, scale = FALSE) - -node_power(.data, normalized = TRUE, scale = FALSE, exponent = 1) - -node_alpha(.data, alpha = 0.85) - -node_pagerank(.data) - -tie_eigenvector(.data, normalized = TRUE) - -network_eigenvector(.data, normalized = TRUE) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{normalized}{Logical scalar, whether the centrality scores are normalized. -Different denominators are used depending on whether the object is one-mode or two-mode, -the type of centrality, and other arguments.} - -\item{scale}{Logical scalar, whether to rescale the vector so the maximum score is 1.} - -\item{exponent}{Decay rate for the Bonacich power centrality score.} - -\item{alpha}{A constant that trades off the importance of external influence against the importance of connection. -When \eqn{\alpha = 0}, only the external influence matters. -As \eqn{\alpha} gets larger, only the connectivity matters and we reduce to eigenvector centrality. -By default \eqn{\alpha = 0.85}.} -} -\value{ -A numeric vector giving the eigenvector centrality measure of each node. - -A numeric vector giving each node's power centrality measure. -} -\description{ -These functions calculate common eigenvector-related centrality measures for one- and two-mode networks: -\itemize{ -\item \code{node_eigenvector()} measures the eigenvector centrality of nodes in a network. -\item \code{node_power()} measures the Bonacich, beta, or power centrality of nodes in a network. -\item \code{node_alpha()} measures the alpha or Katz centrality of nodes in a network. -\item \code{node_pagerank()} measures the pagerank centrality of nodes in a network. -\item \code{tie_eigenvector()} measures the eigenvector centrality of ties in a network. -\item \code{network_eigenvector()} measures the eigenvector centralization for a network. -} - -All measures attempt to use as much information as they are offered, -including whether the networks are directed, weighted, or multimodal. -If this would produce unintended results, -first transform the salient properties using e.g. \code{\link[=to_undirected]{to_undirected()}} functions. -All centrality and centralization measures return normalized measures by default, -including for two-mode networks. -} -\details{ -We use \code{{igraph}} routines behind the scenes here for consistency and because they are often faster. -For example, \code{igraph::eigencentrality()} is approximately 25\% faster than \code{sna::evcent()}. -} -\section{Eigenvector centrality}{ - -Eigenvector centrality operates as a measure of a node's influence in a network. -The idea is that being connected to well-connected others results in a higher score. -Each node's eigenvector centrality can be defined as: -\deqn{x_i = \frac{1}{\lambda} \sum_{j \in N} a_{i,j} x_j} -where \eqn{a_{i,j} = 1} if \eqn{i} is linked to \eqn{j} and 0 otherwise, -and \eqn{\lambda} is a constant representing the principal eigenvalue. -Rather than performing this iteration, -most routines solve the eigenvector equation \eqn{Ax = \lambda x}. -} - -\section{Power centrality}{ - -Power or beta (or Bonacich) centrality -} - -\section{Alpha centrality}{ - -Alpha or Katz (or Katz-Bonacich) centrality operates better than eigenvector centrality -for directed networks. -Eigenvector centrality will return 0s for all nodes not in the main strongly-connected component. -Each node's alpha centrality can be defined as: -\deqn{x_i = \frac{1}{\lambda} \sum_{j \in N} a_{i,j} x_j + e_i} -where \eqn{a_{i,j} = 1} if \eqn{i} is linked to \eqn{j} and 0 otherwise, -\eqn{\lambda} is a constant representing the principal eigenvalue, -and \eqn{e_i} is some external influence used to ensure that even nodes beyond the main -strongly connected component begin with some basic influence. -Note that many equations replace \eqn{\frac{1}{\lambda}} with \eqn{\alpha}, -hence the name. - -For example, if \eqn{\alpha = 0.5}, then each direct connection (or alter) would be worth \eqn{(0.5)^1 = 0.5}, -each secondary connection (or tertius) would be worth \eqn{(0.5)^2 = 0.25}, -each tertiary connection would be worth \eqn{(0.5)^3 = 0.125}, and so on. - -Rather than performing this iteration though, -most routines solve the equation \eqn{x = (I - \frac{1}{\lambda} A^T)^{-1} e}. -} - -\examples{ -node_eigenvector(mpn_elite_mex) -node_eigenvector(ison_southern_women) -node_power(ison_southern_women, exponent = 0.5) -tie_eigenvector(ison_adolescents) -network_eigenvector(mpn_elite_mex) -network_eigenvector(ison_southern_women) -} -\references{ -Bonacich, Phillip. 1991. -“Simultaneous Group and Individual Centralities.” -\emph{Social Networks} 13(2):155–68. -\doi{10.1016/0378-8733(91)90018-O}. - -Bonacich, Phillip. 1987. -“Power and Centrality: A Family of Measures.” -\emph{The American Journal of Sociology}, 92(5): 1170–82. -\doi{10.1086/228631}. - -Katz, Leo 1953. -"A new status index derived from sociometric analysis". -\emph{Psychometrika}. 18(1): 39–43. - -Bonacich, P. and Lloyd, P. 2001. -“Eigenvector-like measures of centrality for asymmetric relations” -\emph{Social Networks}. 23(3):191-201. - -Brin, Sergey and Page, Larry. 1998. -"The anatomy of a large-scale hypertextual web search engine". -\emph{Proceedings of the 7th World-Wide Web Conference}. Brisbane, Australia. -} -\seealso{ -Other centrality: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{degree_centrality}} - -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{centrality} -\concept{measures} diff --git a/man/equivalence.Rd b/man/equivalence.Rd deleted file mode 100644 index 44949237c..000000000 --- a/man/equivalence.Rd +++ /dev/null @@ -1,129 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/member_equivalence.R -\name{equivalence} -\alias{equivalence} -\alias{node_equivalence} -\alias{node_structural_equivalence} -\alias{node_regular_equivalence} -\alias{node_automorphic_equivalence} -\title{Equivalence clustering algorithms} -\source{ -\url{https://github.com/aslez/concoR} -} -\usage{ -node_equivalence( - .data, - census, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), - range = 8L -) - -node_structural_equivalence( - .data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), - range = 8L -) - -node_regular_equivalence( - .data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), - range = 8L -) - -node_automorphic_equivalence( - .data, - k = c("silhouette", "elbow", "strict"), - cluster = c("hierarchical", "concor"), - distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), - range = 8L -) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{census}{A matrix returned by a \verb{node_*_census()} function.} - -\item{k}{Typically a character string indicating which method -should be used to select the number of clusters to return. -By default \code{"silhouette"}, other options include \code{"elbow"} and \code{"strict"}. -\code{"strict"} returns classes with members only when strictly equivalent. -\code{"silhouette"} and \code{"elbow"} select classes based on the distance between -clusters or between nodes within a cluster. -Fewer, identifiable letters, e.g. \code{"e"} for elbow, is sufficient. -Alternatively, if \code{k} is passed an integer, e.g. \code{k = 3}, -then all selection routines are skipped in favour of this number of clusters.} - -\item{cluster}{Character string indicating whether clusters should be -clustered hierarchically (\code{"hierarchical"}) or -through convergence of correlations (\code{"concor"}). -Fewer, identifiable letters, e.g. \code{"c"} for CONCOR, is sufficient.} - -\item{distance}{Character string indicating which distance metric -to pass on to \code{stats::dist}. -By default \code{"euclidean"}, but other options include -\code{"maximum"}, \code{"manhattan"}, \code{"canberra"}, \code{"binary"}, and \code{"minkowski"}. -Fewer, identifiable letters, e.g. \code{"e"} for Euclidean, is sufficient.} - -\item{range}{Integer indicating the maximum number of (k) clusters -to evaluate. -Ignored when \code{k = "strict"} or a discrete number is given for \code{k}.} -} -\description{ -These functions combine an appropriate \verb{_census()} function -together with methods for calculating the hierarchical clusters -provided by a certain distance calculation. -\itemize{ -\item \code{node_equivalence()} assigns nodes membership based on their equivalence -with respective to some census/class. -The following functions call this function, together with an appropriate census. -\itemize{ -\item \code{node_structural_equivalence()} assigns nodes membership based on their -having equivalent ties to the same other nodes. -\item \code{node_regular_equivalence()} assigns nodes membership based on their -having equivalent patterns of ties. -\item \code{node_automorphic_equivalence()} assigns nodes membership based on their -having equivalent distances to other nodes. -} -} - -A \code{plot()} method exists for investigating the dendrogram -of the hierarchical cluster and showing the returned cluster -assignment. -} -\examples{ -\donttest{ -(nse <- node_structural_equivalence(mpn_elite_usa_advice)) -plot(nse) -} -\donttest{ -(nre <- node_regular_equivalence(mpn_elite_usa_advice, - cluster = "concor")) -plot(nre) -} -\donttest{ -(nae <- node_automorphic_equivalence(mpn_elite_usa_advice, - k = "elbow")) -plot(nae) -} -} -\seealso{ -Other memberships: -\code{\link{cliques}}, -\code{\link{community}}, -\code{\link{components}()}, -\code{\link{core}} -} -\concept{memberships} diff --git a/man/features.Rd b/man/features.Rd deleted file mode 100644 index f7657c76e..000000000 --- a/man/features.Rd +++ /dev/null @@ -1,181 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_features.R -\name{features} -\alias{features} -\alias{network_core} -\alias{network_richclub} -\alias{network_factions} -\alias{network_modularity} -\alias{network_smallworld} -\alias{network_scalefree} -\alias{network_balance} -\title{Measures of network topological features} -\source{ -\code{{signnet}} by David Schoch -} -\usage{ -network_core(.data, membership = NULL) - -network_richclub(.data) - -network_factions(.data, membership = NULL) - -network_modularity(.data, membership = NULL, resolution = 1) - -network_smallworld(.data, method = c("omega", "sigma", "SWI"), times = 100) - -network_scalefree(.data) - -network_balance(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{membership}{A vector of partition membership.} - -\item{resolution}{A proportion indicating the resolution scale. -By default 1.} - -\item{method}{There are three small-world measures implemented: -\itemize{ -\item "sigma" is the original equation from Watts and Strogatz (1998), -\deqn{\frac{\frac{C}{C_r}}{\frac{L}{L_r}}}, -where \eqn{C} and \eqn{L} are the observed -clustering coefficient and path length, respectively, -and \eqn{C_r} and \eqn{L_r} are the averages obtained from -random networks of the same dimensions and density. -A \eqn{\sigma > 1} is considered to be small-world, -but this measure is highly sensitive to network size. -\item "omega" (the default) is an update from Telesford et al. (2011), -\deqn{\frac{L_r}{L} - \frac{C}{C_l}}, -where \eqn{C_l} is the clustering coefficient for a lattice graph -with the same dimensions. -\eqn{\omega} ranges between 0 and 1, -where 1 is as close to a small-world as possible. -\item "SWI" is an alternative proposed by Neal (2017), -\deqn{\frac{L - L_l}{L_r - L_l} \times \frac{C - C_r}{C_l - C_r}}, -where \eqn{L_l} is the average path length for a lattice graph -with the same dimensions. -\eqn{SWI} also ranges between 0 and 1 with the same interpretation, -but where there may not be a network for which \eqn{SWI = 1}. -}} - -\item{times}{Integer of number of simulations.} -} -\description{ -These functions measure certain topological features of networks: -\itemize{ -\item \code{network_core()} measures the correlation between a network -and a core-periphery model with the same dimensions. -\item \code{network_richclub()} measures the rich-club coefficient of a network. -\item \code{network_factions()} measures the correlation between a network -and a component model with the same dimensions. -If no 'membership' vector is given for the data, -\code{node_kernighanlin()} is used to partition nodes into two groups. -\item \code{network_modularity()} measures the modularity of a network -based on nodes' membership in defined clusters. -\item \code{network_smallworld()} measures the small-world coefficient for one- or -two-mode networks. Small-world networks can be highly clustered and yet -have short path lengths. -\item \code{network_scalefree()} measures the exponent of a fitted -power-law distribution. An exponent between 2 and 3 usually indicates -a power-law distribution. -\item \code{network_balance()} measures the structural balance index on -the proportion of balanced triangles, -ranging between \code{0} if all triangles are imbalanced and -\code{1} if all triangles are balanced. -\item \code{network_change()} measures the Hamming distance between two or more networks. -\item \code{network_stability()} measures the Jaccard index of stability between two or more networks. -} - -These \verb{network_*()} functions return a single numeric scalar or value. -} -\section{Modularity}{ - -Modularity measures the difference between the number of ties within each community -from the number of ties expected within each community in a random graph -with the same degrees, and ranges between -1 and +1. -Modularity scores of +1 mean that ties only appear within communities, -while -1 would mean that ties only appear between communities. -A score of 0 would mean that ties are half within and half between communities, -as one would expect in a random graph. - -Modularity faces a difficult problem known as the resolution limit -(Fortunato and Barthélemy 2007). -This problem appears when optimising modularity, -particularly with large networks or depending on the degree of interconnectedness, -can miss small clusters that 'hide' inside larger clusters. -In the extreme case, this can be where they are only connected -to the rest of the network through a single tie. -} - -\examples{ -network_core(ison_adolescents) -network_core(ison_southern_women) -network_richclub(ison_adolescents) - network_factions(mpn_elite_mex) - network_factions(ison_southern_women) -network_modularity(ison_adolescents, - node_kernighanlin(ison_adolescents)) -network_modularity(ison_southern_women, - node_kernighanlin(ison_southern_women)) -network_smallworld(ison_brandes) -network_smallworld(ison_southern_women) -network_scalefree(ison_adolescents) -network_scalefree(generate_scalefree(50, 1.5)) -network_scalefree(create_lattice(100)) -network_balance(ison_marvel_relationships) -} -\references{ -Borgatti, Stephen P., and Martin G. Everett. 2000. -“Models of Core/Periphery Structures.” -\emph{Social Networks} 21(4):375–95. -\doi{10.1016/S0378-8733(99)00019-2} - -Murata, Tsuyoshi. 2010. Modularity for Bipartite Networks. -In: Memon, N., Xu, J., Hicks, D., Chen, H. (eds) -\emph{Data Mining for Social Network Data. Annals of Information Systems}, Vol 12. -Springer, Boston, MA. -\doi{10.1007/978-1-4419-6287-4_7} - -Watts, Duncan J., and Steven H. Strogatz. 1998. -“Collective Dynamics of ‘Small-World’ Networks.” -\emph{Nature} 393(6684):440–42. -\doi{10.1038/30918}. - -Telesford QK, Joyce KE, Hayasaka S, Burdette JH, Laurienti PJ. 2011. -"The ubiquity of small-world networks". -\emph{Brain Connectivity} 1(5): 367–75. -\doi{10.1089/brain.2011.0038}. - -Neal Zachary P. 2017. -"How small is it? Comparing indices of small worldliness". -\emph{Network Science}. 5 (1): 30–44. -\doi{10.1017/nws.2017.5}. -} -\seealso{ -\code{\link[=network_transitivity]{network_transitivity()}} and \code{\link[=network_equivalency]{network_equivalency()}} -for how clustering is calculated - -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/figures/logo.png b/man/figures/logo.png index a21b608a5..13092a832 100644 Binary files a/man/figures/logo.png and b/man/figures/logo.png differ diff --git a/man/figures/logo_old.png b/man/figures/logo_old.png new file mode 100644 index 000000000..a21b608a5 Binary files /dev/null and b/man/figures/logo_old.png differ diff --git a/man/heterogeneity.Rd b/man/heterogeneity.Rd deleted file mode 100644 index 69d0c891c..000000000 --- a/man/heterogeneity.Rd +++ /dev/null @@ -1,141 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_heterogeneity.R -\name{heterogeneity} -\alias{heterogeneity} -\alias{network_richness} -\alias{node_richness} -\alias{network_diversity} -\alias{node_diversity} -\alias{network_heterophily} -\alias{node_heterophily} -\alias{network_assortativity} -\alias{network_spatial} -\title{Measures of network diversity} -\usage{ -network_richness(.data, attribute) - -node_richness(.data, attribute) - -network_diversity(.data, attribute, clusters = NULL) - -node_diversity(.data, attribute) - -network_heterophily(.data, attribute) - -node_heterophily(.data, attribute) - -network_assortativity(.data) - -network_spatial(.data, attribute) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{attribute}{Name of a nodal attribute or membership vector -to use as categories for the diversity measure.} - -\item{clusters}{A nodal cluster membership vector or name of a vertex attribute.} -} -\description{ -These functions offer ways to summarise the heterogeneity of an attribute -across a network, within groups of a network, or the distribution of ties -across this attribute: -\itemize{ -\item \code{network_richness()} measures the number of unique categories -in a network attribute. -\item \code{node_richness()} measures the number of unique categories -of an attribute to which each node is connected. -\item \code{network_diversity()} measures the heterogeneity of ties across a network -or within clusters by node attributes. -\item \code{node_diversity()} measures the heterogeneity of each node's -local neighbourhood. -\item \code{network_heterophily()} measures how embedded nodes in the network -are within groups of nodes with the same attribute. -\item \code{node_heterophily()} measures each node's embeddedness within groups -of nodes with the same attribute. -\item \code{network_assortativity()} measures the degree assortativity in a network. -\item \code{network_spatial()} measures the spatial association/autocorrelation ( -global Moran's I) in a network. -} -} -\section{network_diversity}{ - -Blau's index (1977) uses a formula known also in other disciplines -by other names -(Gini-Simpson Index, Gini impurity, Gini's diversity index, -Gibbs-Martin index, and probability of interspecific encounter (PIE)): -\deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }}, -where \eqn{p_i} is the proportion of group members in \eqn{i}th category -and \eqn{k} is the number of categories for an attribute of interest. -This index can be interpreted as the probability that two members -randomly selected from a group would be from different categories. -This index finds its minimum value (0) when there is no variety, -i.e. when all individuals are classified in the same category. -The maximum value depends on the number of categories and -whether nodes can be evenly distributed across categories. -} - -\section{network_homophily}{ - -Given a partition of a network into a number of mutually exclusive groups then -The E-I index is the number of ties between (or \emph{external}) nodes -grouped in some mutually exclusive categories -minus the number of ties within (or \emph{internal}) these groups -divided by the total number of ties. -This value can range from 1 to -1, -where 1 indicates ties only between categories/groups and -1 ties only within categories/groups. -} - -\examples{ -network_richness(mpn_bristol) -node_richness(mpn_bristol, "type") -marvel_friends <- manynet::to_unsigned(manynet::ison_marvel_relationships, "positive") -network_diversity(marvel_friends, "Gender") -network_diversity(marvel_friends, "Attractive") -network_diversity(marvel_friends, "Gender", "Rich") -node_diversity(marvel_friends, "Gender") -node_diversity(marvel_friends, "Attractive") -network_heterophily(marvel_friends, "Gender") -network_heterophily(marvel_friends, "Attractive") -node_heterophily(marvel_friends, "Gender") -node_heterophily(marvel_friends, "Attractive") -network_assortativity(mpn_elite_mex) -network_spatial(ison_lawfirm, "age") -} -\references{ -Blau, Peter M. (1977). -\emph{Inequality and heterogeneity}. -New York: Free Press. - -Krackhardt, David and Robert N. Stern (1988). -Informal networks and organizational crises: an experimental simulation. -\emph{Social Psychology Quarterly} 51(2), 123-140. - -Moran, Patrick Alfred Pierce. 1950. -"Notes on Continuous Stochastic Phenomena". -\emph{Biometrika} 37(1): 17-23. -\doi{10.2307/2332142} -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/hierarchy.Rd b/man/hierarchy.Rd deleted file mode 100644 index a54247f83..000000000 --- a/man/hierarchy.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_hierarchy.R -\name{hierarchy} -\alias{hierarchy} -\alias{network_connectedness} -\alias{network_efficiency} -\alias{network_upperbound} -\title{Graph theoretic dimensions of hierarchy} -\usage{ -network_connectedness(.data) - -network_efficiency(.data) - -network_upperbound(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These functions, together with \code{network_reciprocity()}, are used jointly to -measure how hierarchical a network is: -\itemize{ -\item \code{network_connectedness()} measures the proportion of dyads in the network -that are reachable to one another, -or the degree to which network is a single component. -\item \code{network_efficiency()} measures the Krackhardt efficiency score. -\item \code{network_upperbound()} measures the Krackhardt (least) upper bound score. -} -} -\examples{ -network_connectedness(ison_networkers) -1 - network_reciprocity(ison_networkers) -network_efficiency(ison_networkers) -network_upperbound(ison_networkers) -} -\references{ -Krackhardt, David. 1994. -Graph theoretical dimensions of informal organizations. -In Carley and Prietula (eds) \emph{Computational Organizational Theory}, -Hillsdale, NJ: Lawrence Erlbaum Associates. Pp. 89-111. - -Everett, Martin, and David Krackhardt. 2012. -“A second look at Krackhardt's graph theoretical dimensions of informal organizations.” -\emph{Social Networks}, 34: 159-163. -\doi{10.1016/j.socnet.2011.10.006} -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/holes.Rd b/man/holes.Rd deleted file mode 100644 index 7b8e66766..000000000 --- a/man/holes.Rd +++ /dev/null @@ -1,132 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_holes.R -\name{holes} -\alias{holes} -\alias{node_bridges} -\alias{node_redundancy} -\alias{node_effsize} -\alias{node_efficiency} -\alias{node_constraint} -\alias{node_hierarchy} -\alias{node_eccentricity} -\alias{node_neighbours_degree} -\alias{tie_cohesion} -\title{Measures of structural holes} -\usage{ -node_bridges(.data) - -node_redundancy(.data) - -node_effsize(.data) - -node_efficiency(.data) - -node_constraint(.data) - -node_hierarchy(.data) - -node_eccentricity(.data) - -node_neighbours_degree(.data) - -tie_cohesion(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These function provide different measures of the degree to which nodes -fill structural holes, as outlined in Burt (1992): -\itemize{ -\item \code{node_bridges()} measures the sum of bridges to which each node -is adjacent. -\item \code{node_redundancy()} measures the redundancy of each nodes' contacts. -\item \code{node_effsize()} measures nodes' effective size. -\item \code{node_efficiency()} measures nodes' efficiency. -\item \code{node_constraint()} measures nodes' constraint scores for one-mode networks -according to Burt (1992) and for two-mode networks according to Hollway et al (2020). -\item \code{node_hierarchy()} measures nodes' exposure to hierarchy, -where only one or two contacts are the source of closure. -\item \code{node_eccentricity()} measures nodes' eccentricity or Koenig number, -a measure of farness based on number of links needed to reach -most distant node in the network. -\item \code{node_neighbours_degree()} measures nodes' average nearest neighbors degree, -or \eqn{knn}, a measure of the type of local environment a node finds itself in -\item \code{tie_cohesion()} measures the ratio between common neighbors to ties' -adjacent nodes and the total number of adjacent nodes, -where high values indicate ties' embeddedness in dense local environments -} - -Burt's theory holds that while those nodes embedded in dense clusters -of close connections are likely exposed to the same or similar ideas and information, -those who fill structural holes between two otherwise disconnected groups -can gain some comparative advantage from that position. -} -\details{ -A number of different ways of measuring these structural holes are available. -Note that we use Borgatti's reformulation for unweighted networks in -\code{node_redundancy()} and \code{node_effsize()}. -Redundancy is thus \eqn{\frac{2t}{n}}, -where \eqn{t} is the sum of ties and \eqn{n} the sum of nodes in each node's neighbourhood, -and effective size is calculated as \eqn{n - \frac{2t}{n}}. -Node efficiency is the node's effective size divided by its degree. -} -\examples{ -node_bridges(ison_adolescents) -node_bridges(ison_southern_women) -node_redundancy(ison_adolescents) -node_redundancy(ison_southern_women) -node_effsize(ison_adolescents) -node_effsize(ison_southern_women) -node_efficiency(ison_adolescents) -node_efficiency(ison_southern_women) -node_constraint(ison_southern_women) -node_hierarchy(ison_adolescents) -node_hierarchy(ison_southern_women) -} -\references{ -Burt, Ronald S. 1992. -\emph{Structural Holes: The Social Structure of Competition}. -Cambridge, MA: Harvard University Press. - -Borgatti, Steven. 1997. -“\href{http://www.analytictech.com/connections/v20(1)/holes.htm}{Structural Holes: Unpacking Burt’s Redundancy Measures}” -\emph{Connections} 20(1):35-38. - -Burchard, Jake, and Benjamin Cornwell. 2018. -“Structural Holes and Bridging in Two-Mode Networks.” -\emph{Social Networks} 55:11–20. -\doi{10.1016/j.socnet.2018.04.001} - -Hollway, James, Jean-Frédéric Morin, and Joost Pauwelyn. 2020. -"Structural conditions for novelty: the introduction of new environmental clauses to the trade regime complex." -\emph{International Environmental Agreements: Politics, Law and Economics} 20 (1): 61–83. -\doi{10.1007/s10784-019-09464-5}. - -Barrat, Alain, Marc Barthelemy, Romualdo Pastor-Satorras, and Alessandro Vespignani. 2004. -"The architecture of complex weighted networks", -\emph{Proc. Natl. Acad. Sci.} 101: 3747. -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} -} -\concept{measures} diff --git a/man/kselect.Rd b/man/kselect.Rd deleted file mode 100644 index 7b45409e4..000000000 --- a/man/kselect.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/model_k.R -\name{kselect} -\alias{kselect} -\alias{k_strict} -\alias{k_elbow} -\alias{k_silhouette} -\title{Methods for selecting clusters} -\usage{ -k_strict(hc, .data) - -k_elbow(hc, .data, census, range) - -k_silhouette(hc, .data, range) -} -\arguments{ -\item{hc}{A hierarchical clustering object.} - -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{census}{A motif census object.} - -\item{range}{An integer indicating the maximum number of options to consider. -The minimum of this and the number of nodes in the network is used.} -} -\description{ -These functions help select the number of clusters to return from \code{hc}, -some hierarchical clustering object: -\itemize{ -\item \code{k_strict()} selects a number of clusters in which there is no -distance between cluster members. -\item \code{k_elbow()} selects a number of clusters in which there is -a fair trade-off between parsimony and fit according to the elbow method. -\item \code{k_silhouette()} selects a number of clusters that -optimises the silhouette score. -} - -These functions are generally not user-facing but used internally -in e.g. the \verb{*_equivalence()} functions. -} -\references{ -Thorndike, Robert L. 1953. -"Who Belongs in the Family?". -\emph{Psychometrika}, 18(4): 267–76. -\doi{10.1007/BF02289263}. - -Rousseeuw, Peter J. 1987. -“Silhouettes: A Graphical Aid to the Interpretation and Validation of Cluster Analysis.” -\emph{Journal of Computational and Applied Mathematics}, 20: 53–65. -\doi{10.1016/0377-0427(87)90125-7}. -} diff --git a/man/migraph-package.Rd b/man/migraph-package.Rd index fe13b7689..d0f9c796b 100644 --- a/man/migraph-package.Rd +++ b/man/migraph-package.Rd @@ -4,11 +4,11 @@ \name{migraph-package} \alias{migraph} \alias{migraph-package} -\title{migraph: Many Network Measures, Motifs, Members, and Models} +\title{migraph: Univariate and multivariate tests for multimodal and other networks} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -A set of tools for analysing multimodal networks. It includes functions for measuring centrality, centralization, cohesion, closure, constraint and diversity, as well as for network block-modelling, regression, and diffusion models. The package is released as a complement to 'Multimodal Political Networks' (2021, ISBN:9781108985000), and includes various datasets used in the book. Built on the 'manynet' package, all functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, and on one-mode, two-mode (bipartite), and sometimes three-mode networks. +A set of tools for testing networks. It includes functions for univariate and multivariate conditional uniform graph and quadratic assignment procedure testing, and network regression. The package is a complement to 'Multimodal Political Networks' (2021, ISBN:9781108985000), and includes various datasets used in the book. Built on the 'manynet' package, all functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, and on one-mode and two-mode (bipartite) networks. } \seealso{ Useful links: diff --git a/man/net_diffusion.Rd b/man/net_diffusion.Rd deleted file mode 100644 index 2f130e62c..000000000 --- a/man/net_diffusion.Rd +++ /dev/null @@ -1,206 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_diffusion.R -\name{net_diffusion} -\alias{net_diffusion} -\alias{network_transmissibility} -\alias{network_infection_length} -\alias{network_reproduction} -\alias{network_immunity} -\alias{network_hazard} -\title{Measures of network diffusion} -\source{ -\code{{netdiffuseR}} -} -\usage{ -network_transmissibility(diff_model) - -network_infection_length(diff_model) - -network_reproduction(diff_model) - -network_immunity(diff_model) - -network_hazard(diff_model) -} -\arguments{ -\item{diff_model}{A valid network diffusion model, -as created by \code{as_diffusion()} or \code{play_diffusion()}.} -} -\description{ -These functions allow measurement of various features of -a diffusion process: -\itemize{ -\item \code{network_transmissibility()} measures the average transmissibility observed -in a diffusion simulation, or the number of new infections over -the number of susceptible nodes. -\item \code{network_infection_length()} measures the average number of time steps -nodes remain infected once they become infected. -\item \code{network_reproduction()} measures the observed reproductive number -in a diffusion simulation as the network's transmissibility over -the network's average infection length. -\item \code{network_immunity()} measures the proportion of nodes that would need -to be protected through vaccination, isolation, or recovery for herd immunity to be reached. -\item \code{network_hazard()} measures the hazard rate or instantaneous probability that -nodes will adopt/become infected at that time -} -} -\section{Transmissibility}{ - -\code{network_transmissibility()} measures how many directly susceptible nodes -each infected node will infect in each time period, on average. -That is: -\deqn{T = \frac{1}{n}\sum_{j=1}^n \frac{i_{j}}{s_{j}}} -where \eqn{i} is the number of new infections in each time period, \eqn{j \in n}, -and \eqn{s} is the number of nodes that could have been infected in that time period -(note that \eqn{s \neq S}, or -the number of nodes that are susceptible in the population). -\eqn{T} can be interpreted as the proportion of susceptible nodes that are -infected at each time period. -} - -\section{Infection length}{ - -\code{network_infection_length()} measures the average number of time steps that -nodes in a network remain infected. -Note that in a diffusion model without recovery, average infection length -will be infinite. -This will also be the case where there is right censoring. -The longer nodes remain infected, the longer they can infect others. -} - -\section{Reproduction number}{ - -\code{network_reproduction()} measures a given diffusion's reproductive number. -Here it is calculated as: -\deqn{R = \min\left(\frac{T}{1/IL}, \bar{k}\right)} -where \eqn{T} is the observed transmissibility in a diffusion -and \eqn{IL} is the observed infection length in a diffusion. -Since \eqn{IL} can be infinite where there is no recovery -or there is right censoring, -and since network structure places an upper limit on how many -nodes each node may further infect (their degree), -this function returns the minimum of \eqn{R_0} -and the network's average degree. - -Interpretation of the reproduction number is oriented around R = 1. -Where \eqn{R > 1}, the 'disease' will 'infect' more and more -nodes in the network. -Where \eqn{R < 1}, the 'disease' will not sustain itself and eventually -die out. -Where \eqn{R = 1}, the 'disease' will continue as endemic, -if conditions allow. -} - -\section{Herd immunity}{ - -\code{network_immunity()} estimates the proportion of a network -that need to be protected from infection for herd immunity -to be achieved. -This is known as the Herd Immunity Threshold or HIT: -\deqn{1 - \frac{1}{R}} -where \eqn{R} is the reproduction number from \code{network_reproduction()}. -The HIT indicates the threshold at which -the reduction of susceptible members of the network means -that infections will no longer keep increasing. -Note that there may still be more infections after this threshold has been reached, -but there should be fewer and fewer. -These excess infections are called the \emph{overshoot}. -This function does \emph{not} take into account the structure -of the network, instead using the average degree. - -Interpretation is quite straightforward. -A HIT or immunity score of 0.75 would mean that 75\% of the nodes in the network -would need to be vaccinated or otherwise protected to achieve herd immunity. -To identify how many nodes this would be, multiply this proportion with the number -of nodes in the network. -} - -\section{Hazard rate}{ - -The hazard rate is the instantaneous probability of adoption/infection at each time point (Allison 1984). -In survival analysis, hazard rate is formally defined as: - -\deqn{% -\lambda(t)=\lim_{h\to +0}\frac{F(t+h)-F(t)}{h}\frac{1}{1-F(t)} % -}{% -\lambda(t-1)= lim (t -> +0) [F(t+h)-F(t)]/h * 1/[1-F(t)] % -} - -By approximating \eqn{h=1}, we can rewrite the equation as - -\deqn{% -\lambda(t)=\frac{F(t+1)-F(t)}{1-F(t)} % -}{% -\lambda(t-1)= [F(t+1)-F(t)]/[1-F(t)] % -} - -If we estimate \eqn{F(t)}, -the probability of not having adopted the innovation in time \eqn{t}, -from the proportion of adopters in that time, -such that \eqn{F(t) \sim q_t/n}{F(t) ~ q(t)/n}, we now have (ultimately for \eqn{t>1}): - -\deqn{% -\lambda(t)=\frac{q_{t+1}/n-q_t/n}{1-q_t/n} = \frac{q_{t+1} - q_t}{n - q_t} = \frac{q_t - q_{t-1}}{n - q_{t-1}} % -}{% -\lambda(t-1)= [q(t+1)/n-q(t)/n]/[1-q(t)/n] = [q(t+1) - q(t)]/[n - q(t)] = [q(t) - q(t-1)]/[n - q(t-1)] % -} - -where \eqn{q_i}{q(i)} is the number of adopters in time \eqn{t}, -and \eqn{n} is the number of vertices in the graph. - -The shape of the hazard rate indicates the pattern of new adopters over time. -Rapid diffusion with convex cumulative adoption curves will have -hazard functions that peak early and decay over time. -Slow concave cumulative adoption curves will have -hazard functions that are low early and rise over time. -Smooth hazard curves indicate constant adoption whereas -those that oscillate indicate variability in adoption behavior over time. -} - -\examples{ - smeg <- manynet::generate_smallworld(15, 0.025) - smeg_diff <- play_diffusion(smeg, recovery = 0.2) - plot(smeg_diff) - # To calculate the average transmissibility for a given diffusion model - network_transmissibility(smeg_diff) - # To calculate the average infection length for a given diffusion model - network_infection_length(smeg_diff) - # To calculate the reproduction number for a given diffusion model - network_reproduction(smeg_diff) - # Calculating the proportion required to achieve herd immunity - network_immunity(smeg_diff) - # To find the number of nodes to be vaccinated - ceiling(network_immunity(smeg_diff) * manynet::network_nodes(smeg)) -# To calculate the hazard rates at each time point -network_hazard(play_diffusion(smeg, transmissibility = 0.3)) -} -\references{ -Kermack, W. and McKendrick, A., 1927. "A contribution to the mathematical theory of epidemics". -\emph{Proc. R. Soc. London A} 115: 700-721. - -Allison, P. 1984. \emph{Event history analysis regression for longitudinal event data}. -London: Sage Publications. - -Wooldridge, J. M. 2010. \emph{Econometric Analysis of Cross Section and Panel Data} (2nd ed.). -Cambridge: MIT Press. -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{node_diffusion}}, -\code{\link{periods}} - -Other diffusion: -\code{\link{node_diffusion}} -} -\concept{diffusion} -\concept{measures} diff --git a/man/network_census.Rd b/man/network_census.Rd deleted file mode 100644 index 3f6eeaf0a..000000000 --- a/man/network_census.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/motif_census.R -\name{network_census} -\alias{network_census} -\alias{network_dyad_census} -\alias{network_triad_census} -\alias{network_mixed_census} -\title{Censuses of motifs at the network level} -\source{ -Alejandro Espinosa 'netmem' -} -\usage{ -network_dyad_census(.data) - -network_triad_census(.data) - -network_mixed_census(.data, object2) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{object2}{A second, two-mode migraph-consistent object.} -} -\description{ -These functions include ways to take a census of the positions of nodes -in a network: -\itemize{ -\item \code{network_dyad_census()} returns a census of dyad motifs in a network. -\item \code{network_triad_census()} returns a census of triad motifs in a network. -\item \code{network_mixed_census()} returns a census of triad motifs that span -a one-mode and a two-mode network. -} -} -\examples{ -network_dyad_census(manynet::ison_algebra) -network_triad_census(manynet::ison_adolescents) -marvel_friends <- manynet::to_unsigned(manynet::ison_marvel_relationships, "positive") -(mixed_cen <- network_mixed_census(marvel_friends, manynet::ison_marvel_teams)) -} -\references{ -Davis, James A., and Samuel Leinhardt. 1967. -“\href{https://files.eric.ed.gov/fulltext/ED024086.pdf}{The Structure of Positive Interpersonal Relations in Small Groups}.” 55. - -Hollway, James, Alessandro Lomi, Francesca Pallotti, and Christoph Stadtfeld. 2017. -“Multilevel Social Spaces: The Network Dynamics of Organizational Fields.” -\emph{Network Science} 5(2): 187–212. -\doi{10.1017/nws.2017.8} -} -\seealso{ -Other motifs: -\code{\link{brokerage_census}}, -\code{\link{node_census}} -} -\concept{motifs} diff --git a/man/node_census.Rd b/man/node_census.Rd deleted file mode 100644 index a9841edac..000000000 --- a/man/node_census.Rd +++ /dev/null @@ -1,105 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/motif_census.R -\name{node_census} -\alias{node_census} -\alias{node_tie_census} -\alias{node_triad_census} -\alias{node_quad_census} -\alias{node_path_census} -\title{Censuses of nodes' motifs} -\usage{ -node_tie_census(.data) - -node_triad_census(.data) - -node_quad_census(.data) - -node_path_census(.data) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} -} -\description{ -These functions include ways to take a census of the positions of nodes -in a network: -\itemize{ -\item \code{node_tie_census()} returns a census of the ties in a network. -For directed networks, out-ties and in-ties are bound together. -for multiplex networks, the various types of ties are bound together. -\item \code{node_triad_census()} returns a census of the triad configurations -nodes are embedded in. -\item \code{node_quad_census()} returns a census of nodes' positions -in motifs of four nodes. -\item \code{node_path_census()} returns the shortest path lengths -of each node to every other node in the network. -} -} -\section{Quad census}{ - -The quad census uses the \code{{oaqc}} package to do -the heavy lifting of counting the number of each orbits. -See \code{vignette('oaqc')}. -However, our function relabels some of the motifs -to avoid conflicts and improve some consistency with -other census-labelling practices. -The letter-number pairing of these labels indicate -the number and configuration of ties. -For now, we offer a rough translation:\tabular{ll}{ - migraph \tab Ortmann and Brandes \cr - E4 \tab co-K4 \cr - I40, I41 \tab co-diamond \cr - H4 \tab co-C4 \cr - L42, L41, L40 \tab co-paw \cr - D42, D40 \tab co-claw \cr - U42, U41 \tab P4 \cr - Y43, Y41 \tab claw \cr - P43, P42, P41 \tab paw \cr - 04 \tab C4 \cr - Z42, Z43 \tab diamond \cr - X4 \tab K4 \cr -} - - -See also \href{https://www.graphclasses.org/smallgraphs.html#nodes4}{this list of graph classes}. -} - -\examples{ -task_eg <- manynet::to_named(manynet::to_uniplex(manynet::ison_algebra, "tasks")) -(tie_cen <- node_tie_census(task_eg)) -(triad_cen <- node_triad_census(task_eg)) -node_quad_census(manynet::ison_southern_women) -node_path_census(manynet::ison_adolescents) -node_path_census(manynet::ison_southern_women) -} -\references{ -Davis, James A., and Samuel Leinhardt. 1967. -“\href{https://files.eric.ed.gov/fulltext/ED024086.pdf}{The Structure of Positive Interpersonal Relations in Small Groups}.” 55. - -Ortmann, Mark, and Ulrik Brandes. 2017. -“Efficient Orbit-Aware Triad and Quad Census in Directed and Undirected Graphs.” -\emph{Applied Network Science} 2(1):13. -\doi{10.1007/s41109-017-0027-2}. - -Dijkstra, Edsger W. 1959. -"A note on two problems in connexion with graphs". -\emph{Numerische Mathematik} 1, 269-71. -\doi{10.1007/BF01386390}. - -Opsahl, Tore, Filip Agneessens, and John Skvoretz. 2010. -"Node centrality in weighted networks: Generalizing degree and shortest paths". -\emph{Social Networks} 32(3): 245-51. -\doi{10.1016/j.socnet.2010.03.006}. -} -\seealso{ -Other motifs: -\code{\link{brokerage_census}}, -\code{\link{network_census}} -} -\concept{motifs} diff --git a/man/node_diffusion.Rd b/man/node_diffusion.Rd deleted file mode 100644 index 2475f0afc..000000000 --- a/man/node_diffusion.Rd +++ /dev/null @@ -1,155 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_diffusion.R -\name{node_diffusion} -\alias{node_diffusion} -\alias{node_adoption_time} -\alias{node_adopter} -\alias{node_thresholds} -\alias{node_infection_length} -\alias{node_exposure} -\title{Measures of nodes in a diffusion} -\usage{ -node_adoption_time(diff_model) - -node_adopter(diff_model) - -node_thresholds(diff_model) - -node_infection_length(diff_model) - -node_exposure(.data, mark, time = 0) -} -\arguments{ -\item{diff_model}{A valid network diffusion model, -as created by \code{as_diffusion()} or \code{play_diffusion()}.} - -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{mark}{A valid 'node_mark' object or -logical vector (TRUE/FALSE) of length equal to -the number of nodes in the network.} - -\item{time}{A time point until which infections/adoptions should be -identified. By default \code{time = 0}.} -} -\description{ -These functions allow measurement of various features of -a diffusion process: -\itemize{ -\item \code{node_adoption_time()}: Measures the number of time steps until -nodes adopt/become infected -\item \code{node_adopter()}: Classifies membership of nodes into diffusion categories -\item \code{node_thresholds()}: Measures nodes' thresholds from the amount -of exposure they had when they became infected -\item \code{node_infection_length()}: Measures the average length nodes that become -infected remain infected in a compartmental model with recovery -\item \code{node_exposure()}: Measures how many exposures nodes have to -a given mark -\item \code{node_is_exposed()}: Marks the nodes that are susceptible, -i.e. are in the immediate neighbourhood of given mark vector -} -} -\section{Adoption time}{ - -\code{node_adoption_time()} measures the time units it took -until each node became infected. -Note that an adoption time of 0 indicates that this was a seed node. -} - -\section{Adopter class}{ - -\code{node_adopter()} classifies the nodes involved in a diffusion -by where on the distribution of adopters they fell. -Valente (1995) defines five memberships: -\itemize{ -\item \emph{Early adopter}: those with an adoption time less than -the average adoption time minus one standard deviation of adoptions times -\item \emph{Early majority}: those with an adoption time between -the average adoption time and -the average adoption time minus one standard deviation of adoptions times -\item \emph{Late majority}: those with an adoption time between -the average adoption time and -the average adoption time plus one standard deviation of adoptions times -\item \emph{Laggard}: those with an adoption time greater than -the average adoption time plus one standard deviation of adoptions times -\item \emph{Non-adopter}: those without an adoption time, -i.e. never adopted -} -} - -\section{Thresholds}{ - -\code{node_thresholds()} infers nodes' thresholds based on how much -exposure they had when they were infected. -This inference is of course imperfect, -especially where there is a sudden increase in exposure, -but it can be used heuristically. -} - -\section{Infection length}{ - -\code{node_infection_length()} measures the average length of time that nodes -that become infected remain infected in a compartmental model with recovery. -Infections that are not concluded by the end of the study period are -calculated as infinite. -} - -\section{Exposure}{ - -\code{node_exposure()} calculates the number of infected/adopting nodes -to which each susceptible node is exposed. -It usually expects network data and -an index or mark (TRUE/FALSE) vector of those nodes which are currently infected, -but if a diff_model is supplied instead it will return -nodes exposure at \eqn{t = 0}. -} - -\examples{ - smeg <- manynet::generate_smallworld(15, 0.025) - smeg_diff <- play_diffusion(smeg, recovery = 0.2) - plot(smeg_diff) - # To measure when nodes adopted a diffusion/were infected - (times <- node_adoption_time(smeg_diff)) - # To classify nodes by their position in the adoption curve - (adopts <- node_adopter(smeg_diff)) - summary(adopts) - summary(times, membership = adopts) - # To infer nodes' thresholds - node_thresholds(smeg_diff) - # To measure how long each node remains infected for - node_infection_length(smeg_diff) - # To measure how much exposure nodes have to a given mark - node_exposure(smeg, mark = c(1,3)) - node_exposure(smeg_diff) -} -\references{ -Valente, Tom W. 1995. \emph{Network models of the diffusion of innovations} -(2nd ed.). Cresskill N.J.: Hampton Press. -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{periods}} - -Other diffusion: -\code{\link{net_diffusion}} -} -\concept{diffusion} -\concept{measures} diff --git a/man/over.Rd b/man/over.Rd deleted file mode 100644 index fe5727c65..000000000 --- a/man/over.Rd +++ /dev/null @@ -1,70 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_over.R -\name{over} -\alias{over} -\alias{over_waves} -\alias{over_time} -\title{Helper functions for measuring over splits of networks} -\usage{ -over_waves( - .data, - FUN, - ..., - attribute = "wave", - strategy = "sequential", - verbose = FALSE -) - -over_time( - .data, - FUN, - ..., - attribute = "time", - slice = NULL, - strategy = "sequential", - verbose = FALSE -) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{FUN}{A function to run over all splits.} - -\item{...}{Further arguments to be passed on to FUN.} - -\item{attribute}{A string naming the attribute to be split upon.} - -\item{strategy}{If \code{{furrr}} is installed, -then multiple cores can be used to accelerate the function. -By default \code{"sequential"}, -but if multiple cores available, -then \code{"multisession"} or \code{"multicore"} may be useful. -Generally this is useful only when \code{times} > 1000. -See \href{https://furrr.futureverse.org}{\code{{furrr}}} for more.} - -\item{verbose}{Whether the function should report on its progress. -By default FALSE. -See \href{https://progressr.futureverse.org}{\code{{progressr}}} for more.} - -\item{slice}{Optionally, a vector of specific slices. -Otherwise all observed slices will be returned.} -} -\description{ -Helper functions for measuring over splits of networks -} -\section{Functions}{ -\itemize{ -\item \code{over_waves()}: Runs a function, e.g. a measure, -over waves of a panel network - -\item \code{over_time()}: Runs a function, e.g. a measure, -over time slices of a dynamic network - -}} diff --git a/man/periods.Rd b/man/periods.Rd deleted file mode 100644 index af0a08d9f..000000000 --- a/man/periods.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/measure_features.R -\name{periods} -\alias{periods} -\alias{network_change} -\alias{network_stability} -\title{Measures of network change} -\usage{ -network_change(.data, object2) - -network_stability(.data, object2) -} -\arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} - -\item{object2}{A network object.} -} -\description{ -These functions measure certain topological features of networks: -\itemize{ -\item \code{network_change()} measures the Hamming distance between two or more networks. -\item \code{network_stability()} measures the Jaccard index of stability between two or more networks. -} - -These \verb{network_*()} functions return a numeric vector the length of the number -of networks minus one. E.g., the periods between waves. -} -\seealso{ -Other measures: -\code{\link{between_centrality}}, -\code{\link{close_centrality}}, -\code{\link{closure}}, -\code{\link{cohesion}()}, -\code{\link{degree_centrality}}, -\code{\link{eigenv_centrality}}, -\code{\link{features}}, -\code{\link{heterogeneity}}, -\code{\link{hierarchy}}, -\code{\link{holes}}, -\code{\link{net_diffusion}}, -\code{\link{node_diffusion}} -} -\concept{measures} diff --git a/man/reexports.Rd b/man/reexports.Rd index 1b4412db9..9969dad8b 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -26,6 +26,7 @@ \alias{ylab} \alias{aes} \alias{ggsave} +\alias{scale_y_discrete} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -37,7 +38,7 @@ below to see their documentation. \item{generics}{\code{\link[generics]{glance}}, \code{\link[generics]{tidy}}} - \item{ggplot2}{\code{\link[ggplot2]{aes}}, \code{\link[ggplot2]{ggplot}}, \code{\link[ggplot2]{ggsave}}, \code{\link[ggplot2:labs]{ggtitle}}, \code{\link[ggplot2]{guides}}, \code{\link[ggplot2]{labs}}, \code{\link[ggplot2:labs]{xlab}}, \code{\link[ggplot2:labs]{ylab}}} + \item{ggplot2}{\code{\link[ggplot2]{aes}}, \code{\link[ggplot2]{ggplot}}, \code{\link[ggplot2]{ggsave}}, \code{\link[ggplot2:labs]{ggtitle}}, \code{\link[ggplot2]{guides}}, \code{\link[ggplot2]{labs}}, \code{\link[ggplot2:scale_discrete]{scale_y_discrete}}, \code{\link[ggplot2:labs]{xlab}}, \code{\link[ggplot2:labs]{ylab}}} \item{igraph}{\code{\link[igraph]{is_igraph}}} diff --git a/man/regression.Rd b/man/regression.Rd index 33a006cc6..5b232f9e1 100644 --- a/man/regression.Rd +++ b/man/regression.Rd @@ -39,14 +39,8 @@ See Haunss and Hollway (2023) for more on this effect. \item dyadic covariates (other networks) can just be named }} -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} +\item{.data}{A manynet-consistent network. +See e.g. \code{manynet::as_tidygraph()} for more details.} \item{method}{A method for establishing the null hypothesis. Note that "qap" uses Dekker et al's (2007) double semi-partialling technique, @@ -119,9 +113,8 @@ Dekker, David, David Krackhard, and Tom A. B. Snijders. 2007. \doi{10.1007/s11336-007-9016-1}. } \seealso{ -\code{vignette("p7linearmodel")} - Other models: +\code{\link{test_distributions}}, \code{\link{tests}} } \concept{models} diff --git a/man/test_distributions.Rd b/man/test_distributions.Rd new file mode 100644 index 000000000..8dcfe0410 --- /dev/null +++ b/man/test_distributions.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_tests.R +\name{test_distributions} +\alias{test_distributions} +\alias{test_distribution} +\alias{test_fit} +\title{Tests of network distributions} +\usage{ +test_distribution(diff_model1, diff_model2) + +test_fit(diff_model, diff_models) +} +\arguments{ +\item{diff_model1, diff_model2}{diff_model objects} + +\item{diff_model}{A diff_model object is returned by +\code{play_diffusion()} or \code{as_diffusion()} and contains +a single empirical or simulated diffusion.} + +\item{diff_models}{A diff_models object is returned by +\code{play_diffusions()} and contains a series of diffusion simulations.} +} +\description{ +These functions conduct tests of distributions: +\itemize{ +\item \code{test_distribution()} performs a two-sample Kolmogorov-Smirnov test on +whether two "diff_model" objects are drawn from the same distribution. +\item \code{test_fit()} performs a chi-squared test on the squared Mahalanobis distance +between a diff_model and diff_models objects. +} +} +\section{Mahalanobis distance}{ + +\code{test_gof()} takes a single diff_model object, +which may be a single empirical or simulated diffusion, +and a diff_models object containing many simulations. +Note that currently only the goodness of fit of the + +It returns a tibble (compatible with \code{broom::glance()}) that includes +the Mahalanobis distance statistic +between the observed and simulated distributions. +It also includes a p-value summarising a chi-squared test on this statistic, +listing also the degrees of freedom and number of observations. +If the p-value is less than the convention 0.05, +then one can argue that the first diffusion is not well captured by +} + +\examples{ + # test_distribution(play_diffusion(ison_networkers), + # play_diffusion(ison_networkers, thresholds = 75)) + # Playing a reasonably quick diffusion + # x <- play_diffusion(generate_random(15), transmissibility = 0.7) + # Playing a slower diffusion + # y <- play_diffusions(generate_random(15), transmissibility = 0.1, times = 40) + # plot(x) + # plot(y) + # test_fit(x, y) +} +\seealso{ +Other models: +\code{\link{regression}}, +\code{\link{tests}} +} +\concept{models} diff --git a/man/tests.Rd b/man/tests.Rd index fd28f351c..e464d12a9 100644 --- a/man/tests.Rd +++ b/man/tests.Rd @@ -4,8 +4,7 @@ \alias{tests} \alias{test_random} \alias{test_permutation} -\alias{test_gof} -\title{Conditional uniform graph and permutation tests} +\title{Tests of network measures} \usage{ test_random( .data, @@ -24,18 +23,10 @@ test_permutation( strategy = "sequential", verbose = FALSE ) - -test_gof(diff_model, diff_models) } \arguments{ -\item{.data}{An object of a \code{{manynet}}-consistent class: -\itemize{ -\item matrix (adjacency or incidence) from \code{{base}} R -\item edgelist, a data frame from \code{{base}} R or tibble from \code{{tibble}} -\item igraph, from the \code{{igraph}} package -\item network, from the \code{{network}} package -\item tbl_graph, from the \code{{tidygraph}} package -}} +\item{.data}{A manynet-consistent network. +See e.g. \code{manynet::as_tidygraph()} for more details.} \item{FUN}{A graph-level statistic function to test.} @@ -61,13 +52,6 @@ See \href{https://furrr.futureverse.org}{\code{{furrr}}} for more.} \item{verbose}{Whether the function should report on its progress. By default FALSE. See \href{https://progressr.futureverse.org}{\code{{progressr}}} for more.} - -\item{diff_model}{A diff_model object is returned by -\code{play_diffusion()} or \code{as_diffusion()} and contains -a single empirical or simulated diffusion.} - -\item{diff_models}{A diff_models object is returned by -\code{play_diffusions()} and contains a series of diffusion simulations.} } \description{ These functions conduct tests of any network-level statistic: @@ -78,47 +62,23 @@ of the same dimensions. \item \code{test_permutation()} performs a quadratic assignment procedure (QAP) test of a measure against a distribution of measures on permutations of the original network. -\item \code{test_gof()} performs a chi-squared test on the squared Mahalanobis distance -between a diff_model and diff_models objects. } } -\section{Mahalanobis distance}{ - -\code{test_gof()} takes a single diff_model object, -which may be a single empirical or simulated diffusion, -and a diff_models object containing many simulations. -Note that currently only the goodness of fit of the - -It returns a tibble (compatible with \code{broom::glance()}) that includes -the Mahalanobis distance statistic -between the observed and simulated distributions. -It also includes a p-value summarising a chi-squared test on this statistic, -listing also the degrees of freedom and number of observations. -If the p-value is less than the convention 0.05, -then one can argue that the first diffusion is not well captured by -} - \examples{ marvel_friends <- to_unsigned(ison_marvel_relationships) marvel_friends <- to_giant(marvel_friends) \%>\% to_subgraph(PowerOrigin == "Human") -(cugtest <- test_random(marvel_friends, network_heterophily, attribute = "Attractive", - times = 200)) -plot(cugtest) -(qaptest <- test_permutation(marvel_friends, - network_heterophily, attribute = "Attractive", - times = 200)) -plot(qaptest) - # Playing a reasonably quick diffusion - x <- play_diffusion(generate_random(15), transmissibility = 0.7) - # Playing a slower diffusion - y <- play_diffusions(generate_random(15), transmissibility = 0.1, times = 40) - plot(x) - plot(y) - test_gof(x, y) +# (cugtest <- test_random(marvel_friends, manynet::net_heterophily, attribute = "Attractive", +# times = 200)) +# plot(cugtest) +# (qaptest <- test_permutation(marvel_friends, +# manynet::net_heterophily, attribute = "Attractive", +# times = 200)) +# plot(qaptest) } \seealso{ Other models: -\code{\link{regression}} +\code{\link{regression}}, +\code{\link{test_distributions}} } \concept{models} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 882048dbe..bf1227cf3 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -31,50 +31,11 @@ navbar: icon: "fab fa-github fa-lg" href: https://github.com/stocnet/migraph reference: - - title: "Measures" - desc: | - Functions for measuring networks and returning a numeric vector or value. - Note that all `node_` and `tie_` measures return a single vector - so that they can be added directly to graph objects. - `network_` measures return one or, in some cases of two-mode measures, - two values. - contents: - - cohesion - - ends_with("_centrality") - - closure - - holes - - heterogeneity - - features - - periods - - over - - hierarchy - - ends_with("_diffusion") - - title: "Motifs" - desc: | - Functions for calculating subgraphs in multimodal networks. - These functions have an additional dimension than `node_` and `network_` - measures and marks that capture the different motifs surveyed. - contents: - - ends_with("_census") - - title: "Memberships" - desc: | - Functions for identifying community, cluster, or class memberships - in partitions within multimodal networks. - They return integer vectors the length of the nodes in the network. - contents: - - community - - components - - equivalence - - core - - cliques - title: "Models" desc: "Functions for modelling multimodal networks:" contents: - starts_with("test") - regression - - starts_with("play") - - cluster - - kselect - title: "Data" desc: | The package contains multimodal, multilevel, and multiplex network data, diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png index 24f5e3636..81c8d1a45 100644 Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png index a083b94d9..211bc440c 100644 Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png index 7ccc1790b..04edb8903 100644 Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png index fff3af849..4717d87cb 100644 Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png index 08a78d3bb..d810fa5db 100644 Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png index cc89d291f..cd539a453 100644 Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png index 427d0c31f..98a36bdd7 100644 Binary files a/pkgdown/favicon/favicon-16x16.png and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png index 6fb71c394..18bd1b187 100644 Binary files a/pkgdown/favicon/favicon-32x32.png and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico index bde0d26d0..6982eb34e 100644 Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ diff --git a/tests/testthat/test-measure_centrality.R b/tests/testthat/test-measure_centrality.R deleted file mode 100644 index 56c353813..000000000 --- a/tests/testthat/test-measure_centrality.R +++ /dev/null @@ -1,157 +0,0 @@ -test_tbl <- manynet::as_tidygraph(manynet::ison_southern_women) -test_igr <- manynet::ison_southern_women -test_mat <- manynet::as_matrix(manynet::ison_southern_women) - -test_that("one mode degree centrality calculated correctly",{ - expect_equal(top5(node_degree(mpn_elite_mex, normalized = FALSE)), c(3,6,8,6,6)) -}) - -test_that("one mode strength centrality calculated correctly",{ - expect_equal(top5(node_degree(to_unweighted(ison_networkers), direction = "in", normalized = FALSE)), - c(29, 24, 11, 18, 8)) - expect_equal(top5(node_degree(ison_networkers, direction = "in", normalized = FALSE, alpha = 1)), - c(2495, 1212, 101, 322, 89)) -}) - -test_that("two mode degree centrality calculated correctly",{ - expect_equal(top5(node_degree(test_mat, normalized = FALSE)), c(8,7,8,7,4)) - expect_equal(top5(node_degree(test_igr, normalized = FALSE)), c(8,7,8,7,4)) - expect_equal(top5(with_graph(test_tbl, node_degree(normalized = FALSE))), c(8,7,8,7,4)) - expect_equal(bot5(node_degree(test_mat, normalized = FALSE)), c(6,4,7,4,4)) - expect_equal(bot5(node_degree(test_igr, normalized = FALSE)), c(6,4,7,4,4)) - expect_equal(bot5(with_graph(test_tbl, node_degree(normalized = FALSE))), c(6,4,7,4,4)) - expect_equal(top5(node_degree(test_mat, normalized = TRUE)), c(0.5714, .5, .5714, .5, .2857)) - expect_equal(top5(node_degree(test_igr, normalized = TRUE)), c(0.5714, .5, .5714, .5, .2857)) - expect_equal(top5(with_graph(test_tbl, node_degree(normalized = TRUE))), c(0.5714, .5, .5714, .5, .2857)) - expect_equal(bot5(node_degree(test_mat, normalized = TRUE)), c(0.3333, .2222, .3889, .2222, .2222)) - expect_equal(bot5(node_degree(test_igr, normalized = TRUE)), c(0.3333, .2222, .3889, .2222, .2222)) - expect_equal(bot5(with_graph(test_tbl, node_degree(normalized = TRUE))), c(0.3333, .2222, .3889, .2222, .2222)) -}) - -test_that("one mode closeness centrality calculated correctly",{ - expect_equal(top3(node_closeness(mpn_elite_mex, normalized = FALSE)), c(0.0118, 0.0119, 0.0137)) -}) - -test_that("two mode closeness centrality calculated correctly",{ - expect_equal(top5(node_closeness(test_mat, normalized = FALSE)), c(0.0167, 0.0152, 0.0167, 0.0152, 0.0125)) - expect_equal(top5(node_closeness(test_igr, normalized = FALSE)), c(0.0167, 0.0152, 0.0167, 0.0152, 0.0125)) - expect_equal(top5(with_graph(test_tbl, node_closeness(normalized = FALSE))), c(0.0167, 0.0152, 0.0167, 0.0152, 0.0125)) - expect_equal(bot5(node_closeness(test_mat, normalized = FALSE)), c(0.0128, 0.0122, 0.0132, 0.0122, 0.0122)) - expect_equal(bot5(node_closeness(test_igr, normalized = FALSE)), c(0.0128, 0.0122, 0.0132, 0.0122, 0.0122)) - expect_equal(bot5(with_graph(test_tbl, node_closeness(normalized = FALSE))), c(0.0128, 0.0122, 0.0132, 0.0122, 0.0122)) - expect_equal(top5(node_closeness(test_mat, normalized = TRUE)), c(0.8000, 0.7273, 0.8000, 0.7273, 0.6000)) - expect_equal(top5(node_closeness(test_igr, normalized = TRUE)), c(0.8000, 0.7273, 0.8000, 0.7273, 0.6000)) - expect_equal(top5(with_graph(test_tbl, node_closeness(normalized = TRUE))), c(0.8000, 0.7273, 0.8000, 0.7273, 0.6000)) - expect_equal(bot5(node_closeness(test_mat, normalized = TRUE)), c(0.5641, 0.5366, 0.5789, 0.5366, 0.5366)) - expect_equal(bot5(node_closeness(test_igr, normalized = TRUE)), c(0.5641, 0.5366, 0.5789, 0.5366, 0.5366)) - expect_equal(bot5(with_graph(test_tbl, node_closeness(normalized = TRUE))), c(0.5641, 0.5366, 0.5789, 0.5366, 0.5366)) -}) - -test_that("one mode betweenness centrality calculated correctly",{ - expect_equal(top3(node_betweenness(mpn_elite_mex, normalized = FALSE)), c(2.8345, 4.5922, 17.3583)) -}) - -test_that("two mode betweenness centrality calculated correctly",{ - expect_equal(top5(node_betweenness(test_mat, normalized = FALSE)), c(42.7600, 22.8565, 38.7393, 22.0119, 4.7279)) - expect_equal(top5(node_betweenness(test_igr, normalized = FALSE)), c(42.7600, 22.8565, 38.7393, 22.0119, 4.7279)) - expect_equal(top5(with_graph(test_tbl, node_betweenness(normalized = FALSE))), c(42.7600, 22.8565, 38.7393, 22.0119, 4.7279)) - expect_equal(bot5(node_betweenness(test_mat, normalized = FALSE)), c(6.8186, 9.0194, 10.2354, 1.8892, 1.8892)) - expect_equal(bot5(node_betweenness(test_igr, normalized = FALSE)), c(6.8186, 9.0194, 10.2354, 1.8892, 1.8892)) - expect_equal(bot5(with_graph(test_tbl, node_betweenness(normalized = FALSE))), c(6.8186, 9.0194, 10.2354, 1.8892, 1.8892)) - expect_equal(top5(node_betweenness(test_mat, normalized = TRUE),4), c(0.0967, 0.0517, 0.0876, 0.0498, 0.0107)) - expect_equal(top5(node_betweenness(test_igr, normalized = TRUE),4), c(0.0967, 0.0517, 0.0876, 0.0498, 0.0107)) - expect_equal(top5(with_graph(test_tbl, node_betweenness(normalized = TRUE)),4), c(0.0967, 0.0517, 0.0876, 0.0498, 0.0107)) - expect_equal(bot5(node_betweenness(test_mat, normalized = TRUE),4), c(0.0151, 0.02, 0.0226, 0.0042, 0.0042)) - expect_equal(bot5(node_betweenness(test_igr, normalized = TRUE),4), c(0.0151, 0.02, 0.0226, 0.0042, 0.0042)) - expect_equal(bot5(with_graph(test_tbl, node_betweenness(normalized = TRUE)),4), c(0.0151, 0.02, 0.0226, 0.0042, 0.0042)) -}) - -test_that("one mode eigenvector centrality calculated correctly",{ - expect_equal(top3(node_eigenvector(mpn_elite_mex, normalized = FALSE)), c(0.0571, 0.0771, 0.1176)) - expect_equal(top3(node_eigenvector(mpn_elite_mex, normalized = TRUE)), c(0.0808, 0.1090, 0.1663)) -}) - -test_that("two mode eigenvector centrality calculated correctly",{ - expect_equal(top3(node_eigenvector(test_mat, normalized = FALSE)), c(0.2991, 0.2809, 0.3338)) - expect_equal(top3(node_eigenvector(test_igr, normalized = FALSE)), c(0.2991, 0.2809, 0.3338)) - expect_equal(bot3(node_eigenvector(test_mat, normalized = FALSE)), c(0.2551, 0.1774, 0.1774)) - expect_equal(bot3(node_eigenvector(test_igr, normalized = FALSE)), c(0.2551, 0.1774, 0.1774)) - expect_equal(top3(node_eigenvector(test_igr, normalized = TRUE)), c(0.423, 0.3973, 0.4721)) -}) - -test_that("node measure class works", { - expect_s3_class(node_degree(ison_adolescents), "node_measure") - expect_s3_class(node_betweenness(ison_adolescents), "node_measure") - expect_s3_class(node_closeness(ison_adolescents), "node_measure") - expect_s3_class(node_eigenvector(ison_adolescents), "node_measure") - expect_s3_class(node_reach(ison_adolescents), "node_measure") - testplot <- plot(node_degree(ison_adolescents)) - expect_equal(testplot$data$Score, unname(node_degree(ison_adolescents))) - # expect_equal(testplot$labels$y, "Frequency") -}) - -# ####### Centralization - -test_that("one-mode centralisation is calculated correctly", { - expect_equal(as.numeric(network_degree(mpn_elite_mex)), 0.303, tolerance = 0.001) - expect_equal(as.numeric(network_closeness(mpn_elite_mex)), 0.386, tolerance = 0.001) - expect_equal(as.numeric(network_betweenness(mpn_elite_mex)), 0.202, tolerance = 0.001) - expect_equal(as.numeric(network_eigenvector(mpn_elite_mex)), 0.630, tolerance = 0.001) -}) - -test_that("two mode degree centralisation calculated correctly", { - expect_equal(as.numeric(network_degree(ison_southern_women, normalized = FALSE)), c(0.1813, 0.5097), tolerance = 0.001) - expect_equal(as.numeric(network_degree(ison_southern_women, direction = "in")), c(0.2308, 0.4661), tolerance = 0.001) - expect_equal(as.numeric(network_degree(ison_southern_women, normalized = TRUE)), c(0.2268, 0.4744), tolerance = 0.001) -}) - -test_that("two mode closeness centralisation calculated correctly", { - expect_equal(as.numeric(network_closeness(ison_southern_women, normalized = TRUE)), c(0.2843, 0.4418), tolerance = 0.001) - expect_equal(as.numeric(network_closeness(ison_southern_women, direction = "in")), c(0.2135, 0.5285), tolerance = 0.001) -}) - -test_that("two mode betweenness centralisation calculated correctly", { - expect_equal(as.numeric(network_betweenness(ison_southern_women, normalized = FALSE)), c(0.0580, 0.2073), tolerance = 0.001) - expect_equal(as.numeric(network_betweenness(ison_southern_women, direction = "in")), c(0.0668, 0.1982), tolerance = 0.001) - expect_equal(as.numeric(network_betweenness(ison_southern_women, normalized = TRUE)), c(0.0586, 0.207), tolerance = 0.001) -}) - -test_that("network_measure class works", { - expect_s3_class(network_degree(ison_algebra), "network_measure") - expect_s3_class(network_betweenness(mpn_elite_usa_advice), "network_measure") - expect_s3_class(network_closeness(mpn_elite_usa_advice), "network_measure") - expect_output(print(network_degree(ison_algebra))) -}) - -# ####### Edge centrality -test_that("tie_degree works", { - expect_s3_class(tie_degree(ison_adolescents), - "tie_measure") - expect_length(tie_degree(ison_adolescents), - manynet::network_ties(ison_adolescents)) -}) - -test_that("tie_betweenness works", { - expect_s3_class(tie_betweenness(ison_adolescents), - "tie_measure") - expect_length(tie_betweenness(ison_adolescents), - manynet::network_ties(ison_adolescents)) - expect_equal(top3(tie_betweenness(ison_adolescents)), - c(7,3,5), tolerance = 0.001) -}) - -test_that("tie_closeness works", { - expect_s3_class(tie_closeness(ison_adolescents), - "tie_measure") - expect_length(tie_closeness(ison_adolescents), - manynet::network_ties(ison_adolescents)) - expect_equal(top3(tie_closeness(ison_adolescents)), - c(0.562,0.692,0.600), tolerance = 0.001) -}) - -test_that("tie_eigenvector works", { - expect_s3_class(tie_eigenvector(ison_southern_women), - "tie_measure") - expect_length(tie_eigenvector(ison_southern_women), - manynet::network_ties(ison_southern_women)) -}) diff --git a/tests/testthat/test-measure_closure.R b/tests/testthat/test-measure_closure.R deleted file mode 100644 index ba13c1e66..000000000 --- a/tests/testthat/test-measure_closure.R +++ /dev/null @@ -1,44 +0,0 @@ -test_that("network density works", { - expect_s3_class(network_density(ison_southern_women), "network_measure") - expect_equal(as.numeric(network_density(manynet::create_empty(10))), 0) - expect_equal(as.numeric(network_density(manynet::create_empty(c(10,6)))), 0) - expect_equal(as.numeric(network_density(manynet::create_filled(10))), 1) - expect_equal(as.numeric(network_density(manynet::create_filled(c(10,6)))), 1) - expect_output(print(network_density(manynet::create_filled(10)))) -}) - -test_that("network reciprocity works", { - expect_s3_class(network_reciprocity(ison_networkers), "network_measure") - expect_output(print(network_reciprocity(ison_networkers))) - expect_length(network_reciprocity(ison_networkers), 1) - expect_equal(as.numeric(network_reciprocity(ison_networkers)), - igraph::reciprocity(as_igraph(ison_networkers))) -}) - -test_that("one-mode object clustering is reported correctly",{ - expect_equal(as.numeric(network_transitivity(ison_algebra)), - 0.69787, tolerance = 0.001) - expect_s3_class(network_transitivity(ison_algebra), "network_measure") - expect_output(print(network_transitivity(ison_algebra))) -}) - -test_that("two-mode object clustering is reported correctly",{ - expect_equal(as.numeric(network_equivalency(ison_southern_women)), - 0.4872, tolerance = 0.001) - expect_s3_class(network_equivalency(ison_southern_women), "network_measure") - expect_output(print(network_equivalency(ison_southern_women))) -}) - -test_that("three-mode clustering calculated correctly",{ - mat1 <- manynet::create_ring(c(10,5)) - mat2 <- manynet::create_ring(c(5,8)) - expect_equal(as.numeric(network_congruency(mat1, mat2)), - 0.368, tolerance = 0.001) - expect_s3_class(network_congruency(mat1, mat2), "network_measure") - expect_output(print(network_congruency(mat1, mat2))) -}) - -test_that("node_transitivity is reported correctly",{ - expect_length(node_transitivity(ison_algebra), network_nodes(ison_algebra)) - expect_s3_class(node_transitivity(ison_algebra), "node_measure") -}) diff --git a/tests/testthat/test-measure_cohesion.R b/tests/testthat/test-measure_cohesion.R deleted file mode 100644 index f6ef95f25..000000000 --- a/tests/testthat/test-measure_cohesion.R +++ /dev/null @@ -1,25 +0,0 @@ -test_that("graph components works", { - expect_s3_class(network_components(mpn_bristol), "network_measure") - expect_equal(as.numeric(network_components(mpn_bristol)), 3) -}) - -test_that("graph cohesion works", { - expect_s3_class(network_cohesion(mpn_bristol), "network_measure") - expect_equal(as.numeric(network_cohesion(mpn_bristol)), 0) -}) - -test_that("graph adhesion works", { - expect_s3_class(network_adhesion(mpn_bristol), "network_measure") - expect_equal(as.numeric(network_adhesion(mpn_bristol)), 0) -}) - -test_that("graph diameter works", { - expect_s3_class(network_diameter(mpn_bristol), "network_measure") - expect_equal(as.numeric(network_diameter(mpn_bristol)), 6) -}) - -test_that("graph length works", { - expect_s3_class(network_length(mpn_bristol), "network_measure") - expect_equal(as.numeric(network_length(mpn_bristol)), 2.451265, - tolerance = 0.000001) -}) diff --git a/tests/testthat/test-measure_features.R b/tests/testthat/test-measure_features.R deleted file mode 100644 index b41a04f6b..000000000 --- a/tests/testthat/test-measure_features.R +++ /dev/null @@ -1,32 +0,0 @@ -set.seed(123) - -test_that("small-world metrics for two mode networks are calculated and displayed correctly", { - expect_s3_class(network_smallworld(ison_southern_women), "network_measure") - expect_equal(as.numeric(network_smallworld(ison_southern_women)), -1.04, tolerance = 0.02) -}) - -test_that("network_balance works", { - expect_s3_class(network_balance(ison_marvel_relationships), "network_measure") - expect_equal(as.numeric(network_balance(ison_marvel_relationships)), 0.668, tolerance = 0.01) - expect_length(network_balance(ison_marvel_relationships), 1) - expect_error(network_balance(ison_adolescents)) -}) - -test_that("network_modularity works for two mode networks", { - expect_s3_class(network_modularity(ison_southern_women, - node_kernighanlin(ison_southern_women)), "network_measure") - expect_length(network_modularity(ison_southern_women, - node_kernighanlin(ison_southern_women)), 1) -}) - -test_that("network_core works", { - expect_s3_class(network_core(ison_adolescents), "network_measure") - expect_equal(length(network_core(ison_adolescents)), - length(network_core(ison_southern_women))) -}) - -test_that("network_factions works", { - expect_s3_class(network_factions(ison_adolescents), "network_measure") - expect_equal(length(network_factions(ison_adolescents)), - length(network_factions(ison_southern_women))) -}) diff --git a/tests/testthat/test-measure_heterogeneity.R b/tests/testthat/test-measure_heterogeneity.R deleted file mode 100644 index 82cccc286..000000000 --- a/tests/testthat/test-measure_heterogeneity.R +++ /dev/null @@ -1,27 +0,0 @@ -#*************** Test the heterogeneity family of functions ******************# - -test_that("diversity function works", { - expect_equal(as.numeric(network_diversity(ison_marvel_relationships, "Gender")), 0.306, tolerance = 0.001) - expect_equal(as.numeric(network_diversity(ison_marvel_relationships, "Gender", "Rich")), - c(0.337,0.165), tolerance = 0.001) -}) - -test_that("heterophily function works", { - expect_equal(as.numeric(network_heterophily(mpn_elite_mex, "military")), -0.3675, tolerance = 0.001) - expect_length(node_heterophily(mpn_elite_mex, "military"), - network_nodes(mpn_elite_mex)) - expect_s3_class(node_heterophily(mpn_elite_mex, "military"), "node_measure") -}) - -test_that("assortativity function works", { - expect_length(network_assortativity(mpn_elite_mex), 1) - expect_s3_class(network_assortativity(mpn_elite_mex), "network_measure") -}) - -test_that("richeness function works", { - expect_length(network_richness(mpn_bristol), 1) - expect_equal(as.numeric(network_richness(mpn_bristol)), 3) - expect_s3_class(network_richness(mpn_bristol), "network_measure") - expect_length(node_richness(mpn_bristol, "type"), 264) - expect_s3_class(node_richness(mpn_bristol, "type"), "node_measure") -}) diff --git a/tests/testthat/test-measure_holes.R b/tests/testthat/test-measure_holes.R deleted file mode 100644 index 2b940edae..000000000 --- a/tests/testthat/test-measure_holes.R +++ /dev/null @@ -1,46 +0,0 @@ -test_that("redundancy is reported correctly", { - expect_s3_class(node_redundancy(ison_brandes), "node_measure") - expect_s3_class(node_redundancy(mpn_elite_usa_advice), "node_measure") - expect_equal(length(node_redundancy(ison_brandes)), network_nodes(ison_brandes)) - expect_equal(length(node_redundancy(mpn_elite_usa_advice)), - network_nodes(mpn_elite_usa_advice)) - expect_named(node_redundancy(mpn_elite_usa_advice)) -}) - -test_that("effective size is calculated and reported correctly", { - expect_s3_class(node_effsize(ison_brandes), "node_measure") - expect_s3_class(node_effsize(mpn_elite_usa_advice), "node_measure") - expect_equal(length(node_effsize(ison_brandes)), network_nodes(ison_brandes)) - expect_equal(length(node_effsize(mpn_elite_usa_advice)), - network_nodes(mpn_elite_usa_advice)) - expect_named(node_effsize(mpn_elite_usa_advice)) - expect_equal(top5(node_effsize(ison_southern_women)), c(2.5,1.3778,2.4561,1.4565,1)) -}) - -test_that("efficiency is reported correctly", { - expect_s3_class(node_efficiency(ison_brandes), "node_measure") - expect_s3_class(node_efficiency(mpn_elite_usa_advice), "node_measure") - expect_equal(length(node_efficiency(ison_brandes)), network_nodes(ison_brandes)) - expect_equal(length(node_efficiency(mpn_elite_usa_advice)), - network_nodes(mpn_elite_usa_advice)) -}) - -test_that("constraint scores are reported correctly for two-mode notworks",{ - expect_equal(round(unname(node_constraint(ison_southern_women)[1:3]),2), c(0.28, 0.31, 0.29)) - # expect_named(node_constraint(ison_southern_women)[1:3], c("Evelyn", "Laura", "Theresa")) -}) - -om <- igraph::graph(edges = c(1,2, 2,3), n = 4, directed = FALSE) - -test_that("constraint scores are reported correctly for one-mode notworks",{ - expect_equal(round(unname(node_constraint(mpn_elite_mex)[1:3]),2), c(0.45, 0.35, 0.28)) -}) - -test_that("hierarchy is reported correctly", { - expect_s3_class(node_hierarchy(ison_brandes), "node_measure") - expect_s3_class(node_hierarchy(mpn_elite_usa_advice), "node_measure") - expect_equal(length(node_hierarchy(ison_brandes)), network_nodes(ison_brandes)) - expect_equal(length(node_hierarchy(mpn_elite_usa_advice)), - network_nodes(mpn_elite_usa_advice)) - expect_named(node_hierarchy(mpn_elite_usa_advice)) -}) diff --git a/tests/testthat/test-member_community.R b/tests/testthat/test-member_community.R deleted file mode 100644 index 61448a4da..000000000 --- a/tests/testthat/test-member_community.R +++ /dev/null @@ -1,24 +0,0 @@ -test_that("node_kernighanlin algorithm works", { - expect_s3_class(node_kernighanlin(mpn_elite_mex), "node_member") - expect_length(node_kernighanlin(mpn_elite_mex), - network_nodes(mpn_elite_mex)) - expect_false(any(node_kernighanlin(mpn_elite_mex) > 2)) -}) - -test_that("node_edge_betweenness algorithm works", { - expect_s3_class(node_edge_betweenness(mpn_elite_mex), "node_member") - expect_length(node_edge_betweenness(mpn_elite_mex), - network_nodes(mpn_elite_mex)) -}) - -test_that("node_fast_greedy algorithm works", { - expect_s3_class(node_fast_greedy(ison_southern_women), "node_member") - expect_length(node_fast_greedy(ison_southern_women), - network_nodes(ison_southern_women)) -}) - -test_that("node_walktrap algorithm works", { - expect_s3_class(node_walktrap(ison_southern_women), "node_member") - expect_length(node_walktrap(ison_southern_women), - network_nodes(ison_southern_women)) -}) diff --git a/tests/testthat/test-member_equivalence.R b/tests/testthat/test-member_equivalence.R deleted file mode 100644 index af64de958..000000000 --- a/tests/testthat/test-member_equivalence.R +++ /dev/null @@ -1,25 +0,0 @@ -# # Equivalence clustering tests - -test_that("equivalence clustering returns the right class", { - expect_s3_class(node_structural_equivalence(ison_adolescents, "strict", "hier"), "node_member") - expect_s3_class(node_structural_equivalence(ison_adolescents, "elbow", "hier"), "node_member") - expect_s3_class(node_structural_equivalence(ison_adolescents, "elbow", "concor"), "node_member") - expect_s3_class(node_regular_equivalence(mpn_elite_mex), "node_member") - expect_s3_class(node_automorphic_equivalence(mpn_elite_mex), "node_member") -}) - -test_that("equivalence clustering works", { - expect_equal(node_structural_equivalence(ison_adolescents, "silhouette", "hier"), node_structural_equivalence(ison_adolescents)) - expect_equal(node_regular_equivalence(mpn_elite_mex), node_regular_equivalence(mpn_elite_mex, "silhouette", "hier")) - expect_equal(network_nodes(ison_adolescents), length(node_structural_equivalence(ison_adolescents, "silhouette", "concor"))) - expect_equal(network_nodes(ison_adolescents), length(node_structural_equivalence(ison_adolescents, k = 3, "hier"))) - expect_equal(network_nodes(ison_adolescents), length(node_structural_equivalence(ison_adolescents, "strict", "concor"))) - expect_equal(network_nodes(mpn_elite_mex), length(node_regular_equivalence(mpn_elite_mex, cluster = "concor"))) - expect_equal(network_nodes(mpn_elite_mex), length(node_regular_equivalence(mpn_elite_mex, "elbow"))) - expect_equal(network_nodes(mpn_elite_mex), length(node_regular_equivalence(mpn_elite_mex, "strict"))) - expect_equal(network_nodes(mpn_elite_usa_advice), length(node_automorphic_equivalence(mpn_elite_usa_advice, "strict", distance = "binary"))) - expect_equal(network_nodes(mpn_elite_usa_advice), length(node_automorphic_equivalence(mpn_elite_usa_advice, distance = "maximum"))) - expect_true(3 %in% node_structural_equivalence(ison_adolescents, k = 3, "concor")) - expect_true(2 %in% node_regular_equivalence(mpn_elite_mex, 2)) - expect_true(3 %in% node_automorphic_equivalence(mpn_elite_usa_advice, 4)) -}) diff --git a/tests/testthat/test-model_regression.R b/tests/testthat/test-model_regression.R index c3e67d580..28af43ff2 100644 --- a/tests/testthat/test-model_regression.R +++ b/tests/testthat/test-model_regression.R @@ -18,9 +18,9 @@ test_that("network_reg estimates correctly",{ test_that("network_reg tests correctly",{ expect_equal(top3(test$pgreqabs, 2), - c(0.14, 0.32, NA), tolerance = 0.1) + c(0.16, 0.32, NA), tolerance = 0.1) expect_equal(top3(test_logit$pgreqabs,2), - c(0.9, 0.2, NA), tolerance = 0.1) + c(0.8, 0.18, NA), tolerance = 0.1) }) tidys <- tidy(test) diff --git a/tests/testthat/test-model_tests.R b/tests/testthat/test-model_tests.R index ed86db3ff..318c8ed6b 100644 --- a/tests/testthat/test-model_tests.R +++ b/tests/testthat/test-model_tests.R @@ -2,14 +2,14 @@ marvel_friends <- manynet::to_giant(manynet::to_unsigned(manynet::ison_marvel_relationships)) %>% manynet::to_subgraph(PowerOrigin == "Human") cugtest <- test_random(marvel_friends, - network_heterophily, + manynet::net_heterophily, attribute = "Attractive", times = 200) cugtest2 <- test_random(marvel_friends, - network_betweenness, + manynet::net_betweenness, times = 200) -cugtest3 <- test_random(ison_southern_women, - network_equivalency, +cugtest3 <- test_random(ison_southern_women, + manynet::net_equivalency, times = 200) test_that("test_random works", { @@ -19,18 +19,18 @@ test_that("test_random works", { expect_equal(length(cugtest$testdist), 200) # NB: Stochastic expect_false(cugtest$mode) expect_false(cugtest$diag) - expect_equal(cugtest$cmode, "csize") + expect_equal(cugtest$cmode, "edges") expect_equal(class(cugtest$plteobs), "numeric") expect_equal(class(cugtest$pgteobs), "numeric") expect_equal(cugtest$reps, 200) expect_s3_class(cugtest, "network_test") # Test stuff cug2 - expect_equal(as.numeric(cugtest2$testval), 0.238, tolerance = 0.001) - expect_equal(mean(cugtest3$testdist), 0.361, tolerance = 0.005) + expect_equal(as.numeric(cugtest2$testval), 0.2375, tolerance = 0.001) + # expect_equal(mean(cugtest3$testdist), 0.3600, tolerance = 0.02) expect_equal(length(cugtest2$testdist), 200) # NB: Stochastic expect_false(cugtest2$mode) expect_false(cugtest2$diag) - expect_equal(cugtest2$cmode, "csize") + expect_equal(cugtest2$cmode, "edges") expect_equal(round(cugtest2$plteobs), 1) expect_equal(round(cugtest2$pgteobs), 0) expect_equal(cugtest2$reps, 200) @@ -42,7 +42,7 @@ marvel_friends <- manynet::to_unsigned(manynet::ison_marvel_relationships) marvel_friends <- manynet::to_giant(marvel_friends) marvel_friends <- manynet::to_subgraph(marvel_friends, PowerOrigin == "Human") qaptest <- test_permutation(marvel_friends, - network_heterophily, + manynet::net_heterophily, attribute = "Attractive", times = 200) test_that("test_permutation works", { @@ -57,9 +57,9 @@ test_that("test_permutation works", { cugplot <- plot(cugtest) test_that("cug plot works", { expect_s3_class(cugplot, "gg") - expect_is(cugplot$layers[[1]], "ggproto") - expect_is(cugplot$layers[[1]]$geom, "GeomDensity") - expect_is(cugplot$layers[[1]]$stat, "StatDensity") + expect_s3_class(cugplot$layers[[1]], "ggproto") + expect_s3_class(cugplot$layers[[1]]$geom, "GeomDensity") + expect_s3_class(cugplot$layers[[1]]$stat, "StatDensity") expect_identical(cugplot$labels$x, "Statistic") expect_identical(cugplot$labels$y, "Density") }) @@ -67,9 +67,9 @@ test_that("cug plot works", { qapplot <- plot(qaptest) test_that("qap plot works", { expect_s3_class(qapplot, "gg") - expect_is(qapplot$layers[[1]], "ggproto") - expect_is(qapplot$layers[[1]]$geom, "GeomDensity") - expect_is(qapplot$layers[[1]]$stat, "StatDensity") + expect_s3_class(qapplot$layers[[1]], "ggproto") + expect_s3_class(qapplot$layers[[1]]$geom, "GeomDensity") + expect_s3_class(qapplot$layers[[1]]$stat, "StatDensity") expect_identical(qapplot$labels$x, "Statistic") expect_identical(qapplot$labels$y, "Density") }) diff --git a/tests/testthat/test-motif_census.R b/tests/testthat/test-motif_census.R deleted file mode 100644 index d2e9713e0..000000000 --- a/tests/testthat/test-motif_census.R +++ /dev/null @@ -1,87 +0,0 @@ -# # Census function family tests -set.seed(123) -task_eg <- manynet::to_named(manynet::to_uniplex(manynet::ison_algebra, "tasks")) - -test <- node_tie_census(task_eg) -test_that("node tie census works", { - expect_equal(test[1:4], rep(0, 4)) - expect_s3_class(test, "node_motif") -}) - -test <- node_triad_census(task_eg) -test_that("node triad census works", { - expect_equal(top3(test[,16]), c(7,8,6)) - expect_s3_class(test, "node_motif") - expect_equal(colnames(test)[1:3], c("003", "012", "102")) -}) - -test <- network_dyad_census(manynet::ison_adolescents) -test_that("network_dyad census works", { - expect_equal(test[[1]], 10) - expect_equal(test[[2]], 18) - expect_equal(names(test), c("Mutual", "Null")) - expect_s3_class(test, "network_motif") - # Error - expect_error(network_dyad_census(manynet::ison_southern_women)) -}) - -test <- network_triad_census(manynet::ison_adolescents) -test_that("network_triad census works", { - expect_equal(test[[1]], 13) - expect_equal(test[[3]], 29) - expect_equal(names(test), c("003", "012", "102", "201", "210", "300")) - expect_s3_class(test, "network_motif") - # Error - expect_error(network_triad_census(manynet::ison_southern_women)) -}) - -test <- node_quad_census(manynet::ison_southern_women) -test_that("node quad census works", { - expect_s3_class(test, "node_motif") - expect_equal(test[1,1], 1402) -}) - -test_that("network_mixed census works", { - marvel_friends <- to_unsigned(manynet::ison_marvel_relationships, "positive") - test <- network_mixed_census(marvel_friends, manynet::ison_marvel_teams) - expect_s3_class(test, "network_motif") - expect_equal(unname(test[1]), 1137) - expect_equal(names(test[1]), "22") - # Errors - expect_error(network_mixed_census(manynet::ison_southern_women, - manynet::ison_marvel_teams)) - expect_error(network_mixed_census(manynet::ison_marvel_teams, - manynet::ison_southern_women)) - expect_error(network_mixed_census(manynet::ison_karateka, - manynet::ison_marvel_teams)) -}) - -test <- node_path_census(manynet::ison_southern_women) -test_that("node path census works", { - expect_equal(network_nodes(manynet::ison_adolescents), - nrow(node_path_census(manynet::ison_adolescents))) - expect_s3_class(test, "node_motif") - expect_true(nrow(node_path_census(manynet::ison_southern_women)) == - ncol(node_path_census(manynet::ison_southern_women))) -}) - -test <- node_brokering_activity(manynet::ison_networkers, "Discipline") -test_that("node activity works", { - expect_s3_class(test, "node_measure") - expect_equal(manynet::network_nodes(manynet::ison_networkers), length(test)) - expect_equal(top3(test), c(333,207,3)) -}) - -test <- node_brokering_exclusivity(manynet::ison_networkers, "Discipline") -test_that("node exclusivity works", { - expect_s3_class(test, "node_measure") - expect_equal(manynet::network_nodes(manynet::ison_networkers), length(test)) - expect_equal(top3(test), c(1,0,0)) -}) - -test <- node_brokering(manynet::ison_networkers, "Discipline") -test_that("node brokering works", { - expect_s3_class(test, "node_member") - expect_equal(manynet::network_nodes(manynet::ison_networkers), length(test)) - expect_equal(top3(test), c("Powerhouse","Connectors","Sideliners")) -})