############################################################## # # A bit of social network analysis using R # # Author: Tom Snijders, University of Oxford # Using ideas from a script by Michal Bojanowski # May 9, 2012 # ############################################################## # load package igraph library(igraph) #============================================================================ # Network data #============================================================================ # how can we construct graphs ?graph # for example g1 <- graph(c(0,1, 0,2, 1,4, 1,5, 2,1, 2,4, 3,4, 4,1, 4,2, 5,1, 5,3)) # note that vertex labels start with 0 g1 summary(g1) plot(g1) # Now for a real network # read some data sets from Emmanuel Lazega # see http://www.stats.ox.ac.uk/~snijders/siena/Lazega_lawyers_data.htm el.adv <- as.matrix(read.table("ELadv.dat")) el.fri <- as.matrix(read.table("ELfriend.dat")) el.wor <- as.matrix(read.table("ELwork.dat")) el.attr <- as.matrix(read.table("ELattr.dat")) adv <- graph.adjacency(el.adv) plot(adv) summary(adv) friend <- graph.adjacency(el.fri) summary(friend) work <- graph.adjacency(el.wor) summary(work) # set attributes ?set.vertex.attribute adv <- set.vertex.attribute(adv, "seniority", value = el.attr[,1]) adv <- set.vertex.attribute(adv, "female", value = el.attr[,3]) adv <- set.vertex.attribute(adv, "office", value = el.attr[,4]) summary(adv) #============================================================================ # Network descriptives #============================================================================ # simple network descriptions vcount(friend) # number of nodes ecount(friend) # number of edges graph.density(friend) # density (very sparse) degree(friend) # vector of degrees table(degree(friend)) # degree distribution hist(degree(friend)) # histogram degree distribution # The functions table and hist are general in R # (not specific to igraph) #============================================================================ # extracting relational information #============================================================================ # extracting edgelists, adjacency matrices get.adjacency(g1) get.edgelist(g1) # adjacency list # # a list with a component for every node: # a vector of labels of its neighbors get.adjlist(g1) #============================================================================ # attributes #============================================================================ summary(adv) get.vertex.attribute(adv, "office") # but we want to have it available adv.off <- get.vertex.attribute(adv, "office") # if we put it between parentheses, we also see it when created: (adv.off <- get.vertex.attribute(adv, "office")) table(adv.off) # compute plotting positions locate <- layout.fruchterman.reingold(adv) dim(locate) head(locate) adv <- set.graph.attribute(adv, "layout", locate) plot(adv) # the saved layout is used (much faster) # subgraph of only the associates adv.ass <- subgraph(adv, 1:36) plot(adv.ass) layout.ass <- layout.mds(adv.ass) adv.ass <- set.graph.attribute(adv.ass, "layout", layout.ass) plot(adv.ass) # still we do not see a lot # try it for the small network layout.g1 <- layout.mds(g1) g1 <- set.graph.attribute(g1, "layout", layout.g1) plot(g1) # nicer than the default plot # V() can be used to access Vertex attributes # # V(g) is a "vertex sequence" of network 'g'. Attributes can be retrieved and # assigned using the '$' operator # # Square brackets can be used to select specific nodes with logical statements # set vertex labels V(g1)$label <- c("a", "b", "b", "c", "b", "d") V(g1)$label plot(g1) # operations V(g1)$color <- "red" # red for all nodes plot(g1) # take the node(s) for which node attribute 'label' is equal to "b" # and set the color atribute for it to "green" V(g1)[ label == "b" ]$color <- "green" plot(g1) # E() can be used to access Edge attributes in a very similar way #============================================================================ # network connectivity #============================================================================ # clusters = components k <- clusters(friend) k # 'k' is a list with: # membership = vector assigning nodes to components # component size = number of nodes in each component # number of components # thus we have 3 components in the friend network: # one giant component and two isolated nodes # are all connections both ways? ?clusters clusters(friend,"strong") #============================================================================ # selecting subgraphs #============================================================================ # 1. select subgraph by dropping elements (nodes/ties) # deleting vertices ?delete.vertices ?degree # how can we find the isolates degree(friend, mode="total")==0 sum(degree(friend, mode="total")==0) # drop them from the network friend.no_iso <- delete.vertices(friend, degree(friend, mode="total")==0) summary(friend.no_iso) # 2. Using 'subgraph' function ?subgraph # this was used above Hartford <- which(V(adv)$office==2) Hartford adv.Hartford <- subgraph(adv, Hartford) summary(adv.Hartford) # This is smaller, better for plotting # first the default plot(adv.Hartford) # then the mds layout.hart <- layout.mds(adv.Hartford) adv.Hartford <- set.graph.attribute(adv.Hartford, "layout", layout.hart) plot(adv.Hartford) # But now we lost the original labels! # They are still in Hartford, but with labels starting at 1 # and we wish to start at 1. H.labels <- as.character(Hartford+1) plot(adv.Hartford, vertex.label=H.labels) #============================================================================ # more SNA #============================================================================ # reachability # lengths of shortest paths plot(g1) m1 <- get.adjacency(g1) m1 m1^2 # this gives squares elementwise, so not what we want m1 %*% m1 # proper matrix multiplication: numbers of paths of length 2 m1 %*% m1 %*% m1 # matrix power of 3: numbers of paths of lentgh 3 shortest.paths(g1) # matrix of lengths of shortest paths average.path.length(g1) # average path length # 'get.shortest.paths' returns vertices on a shortest path between specified # pair of vertices # vertices on the shortest path between 0 and 18 # default does not take directionality into account l <- get.shortest.paths(adv.Hartford, 0, 18) l # a list with one component # now the longest path get.diameter(adv.Hartford) # really? get.shortest.paths(adv.Hartford, 2, 10) # apparently for diameter the default is directed; therefore get.shortest.paths(adv.Hartford, 2, 10, mode="out") ?get.diameter get.diameter(adv.Hartford, directed = FALSE) ### Centrality # vector of betweenness centrality scores b <- betweenness(adv) str(b) # eigenvector centralities ec <- evcent(adv) str(ec) ec <- ec$vector ec # betweenness mapped to node size b1 <- betweenness(adv.Hartford) plot( adv.Hartford, vertex.color="white", vertex.label.cex=.8, vertex.size= b1 * 20 / max(b1) + 10) # or with vertex labels indicating betweenness numerically plot( adv.Hartford, vertex.color="white", vertex.label.cex=.8, vertex.size= b1 * 20 / max(b1) + 10, vertex.label = round(b1) ) ### degree assortativity degs <- degree(adv) # edge list with numeric ids (start from 0 so +1) el <- get.edgelist(adv, names=FALSE) + 1 head(el) deglist <- apply(el, 2, function(x) degs[x] ) head(deglist) plot(jitter(deglist), main="Degree mixing", xlab="Sender", ylab="Receiver") ?degree ### community detection # grouping nodes into communities by majority voting in the # neighborhoods com <- spinglass.community(adv.Hartford) com com <- com$membership plot( adv.Hartford, vertex.color=com+1, vertex.size=5, vertex.label=NA) ### triad census ?triad.census triad.census(adv) dyad.census(adv) ### cliques largest.cliques(adv)