##############################################################
#
# A bit of social network analysis using R
#
# Author: Tom Snijders, University of Oxford
# Using ideas from a script by  Michal Bojanowski 
# May 9, 2012
#
##############################################################


# load package igraph
library(igraph)


#============================================================================ 
# Network data
#============================================================================ 


# how can we construct graphs
?graph
# for example
g1       <- graph(c(0,1, 0,2, 1,4, 1,5, 2,1, 2,4, 3,4, 4,1, 4,2, 
                    5,1, 5,3))
# note that vertex labels start with 0

g1
summary(g1)
plot(g1)

# Now for a real network
# read some data sets from Emmanuel Lazega
# see http://www.stats.ox.ac.uk/~snijders/siena/Lazega_lawyers_data.htm
el.adv   <- as.matrix(read.table("ELadv.dat"))
el.fri   <- as.matrix(read.table("ELfriend.dat"))
el.wor   <- as.matrix(read.table("ELwork.dat"))
el.attr  <- as.matrix(read.table("ELattr.dat"))

adv      <- graph.adjacency(el.adv)
plot(adv)
summary(adv)

friend   <- graph.adjacency(el.fri)
summary(friend)

work     <- graph.adjacency(el.wor)
summary(work)

# set attributes
?set.vertex.attribute
adv <-   set.vertex.attribute(adv, "seniority", value = el.attr[,1])
adv <-   set.vertex.attribute(adv, "female", value = el.attr[,3])
adv <-   set.vertex.attribute(adv, "office", value = el.attr[,4])

summary(adv)


#============================================================================ 
# Network descriptives
#============================================================================ 


# simple network descriptions

vcount(friend)        # number of nodes
ecount(friend)        # number of edges
graph.density(friend) # density (very sparse)

degree(friend)        # vector of degrees
table(degree(friend)) # degree distribution
hist(degree(friend))  # histogram degree distribution

# The functions table and hist are general in R 
# (not specific to igraph)


#============================================================================ 
# extracting relational information
#============================================================================


# extracting edgelists, adjacency matrices
get.adjacency(g1)
get.edgelist(g1)

# adjacency list
#
# a list with a component for every node: 
# a vector of labels of its neighbors
get.adjlist(g1)


#============================================================================ 
#  attributes
#============================================================================ 


summary(adv)
get.vertex.attribute(adv, "office")
# but we want to have it available
adv.off <- get.vertex.attribute(adv, "office")
# if we put it between parentheses, we also see it when created:
(adv.off <- get.vertex.attribute(adv, "office"))
table(adv.off)

# compute plotting positions
locate <- layout.fruchterman.reingold(adv)
dim(locate)
head(locate)

adv <- set.graph.attribute(adv, "layout", locate)
plot(adv) # the saved layout is used (much faster)

# subgraph of only the associates
adv.ass <- subgraph(adv, 1:36)
plot(adv.ass)

layout.ass <- layout.mds(adv.ass)
adv.ass <- set.graph.attribute(adv.ass, "layout", layout.ass)
plot(adv.ass)
# still we do not see a lot

# try it for the small network
layout.g1 <- layout.mds(g1)
g1 <- set.graph.attribute(g1, "layout", layout.g1)
plot(g1)
# nicer than the default plot

# V() can be used to access Vertex attributes
#
# V(g) is a "vertex sequence" of network 'g'. Attributes can be retrieved and
# assigned using the '$' operator
#
# Square brackets can be used to select specific nodes with logical statements

# set vertex labels
V(g1)$label <- c("a", "b", "b", "c", "b", "d")
V(g1)$label
plot(g1)

# operations
V(g1)$color <- "red"   # red for all nodes
plot(g1)
# take the node(s) for which node attribute 'label' is equal to "b" 
# and set the color atribute for it to "green"
V(g1)[ label == "b" ]$color <- "green"
plot(g1)

# E() can be used to access Edge attributes in a very similar way


#============================================================================ 
# network connectivity
#============================================================================ 


# clusters = components

k <- clusters(friend)
k
# 'k' is a list with:
# membership  =  vector assigning nodes to components
# component size = number of nodes in each component
# number of components  

# thus we have 3 components in the friend network:
# one giant component and two isolated nodes

# are all connections both ways?
?clusters
clusters(friend,"strong")


#============================================================================ 
# selecting subgraphs
#============================================================================ 


# 1. select subgraph by dropping elements (nodes/ties) 

# deleting vertices 
?delete.vertices
?degree
# how can we find the isolates
degree(friend, mode="total")==0
sum(degree(friend, mode="total")==0)
# drop them from the network
friend.no_iso <- delete.vertices(friend, degree(friend, mode="total")==0)
summary(friend.no_iso)

# 2. Using 'subgraph' function

?subgraph
# this was used above
Hartford <- which(V(adv)$office==2)
Hartford
adv.Hartford <- subgraph(adv, Hartford)
summary(adv.Hartford)

# This is smaller, better for plotting
# first the default
plot(adv.Hartford)
# then the mds
layout.hart <- layout.mds(adv.Hartford)
adv.Hartford <- set.graph.attribute(adv.Hartford, "layout", layout.hart)
plot(adv.Hartford)

# But now we lost the original labels!
# They are still in Hartford, but with labels starting at 1
# and we wish to start at 1.

H.labels <- as.character(Hartford+1)
plot(adv.Hartford, vertex.label=H.labels)


#============================================================================ 
# more SNA
#============================================================================ 


# reachability
# lengths of shortest paths
plot(g1)
m1 <- get.adjacency(g1)
m1
m1^2     # this gives squares elementwise, so not what we want
m1 %*% m1 # proper matrix multiplication: numbers of paths of length 2 
m1 %*% m1 %*% m1 # matrix power of 3: numbers of paths of lentgh 3

shortest.paths(g1) # matrix of lengths of shortest paths
average.path.length(g1)  # average path length

# 'get.shortest.paths' returns vertices on a shortest path between specified
# pair of vertices

# vertices on the shortest path between 0 and 18
# default does not take directionality into account
l <- get.shortest.paths(adv.Hartford, 0, 18)
l           # a list with one component

# now the longest path
get.diameter(adv.Hartford)
# really?
get.shortest.paths(adv.Hartford, 2, 10)
# apparently for diameter the default is directed; therefore
get.shortest.paths(adv.Hartford, 2, 10, mode="out")
?get.diameter
get.diameter(adv.Hartford, directed = FALSE)

### Centrality

# vector of betweenness centrality scores
b <- betweenness(adv)
str(b)

# eigenvector centralities
ec <- evcent(adv)
str(ec)
ec <- ec$vector
ec

# betweenness mapped to node size
b1 <- betweenness(adv.Hartford)
plot( adv.Hartford,
  vertex.color="white",
  vertex.label.cex=.8,
  vertex.size= b1 * 20 / max(b1) + 10)

# or with vertex labels indicating betweenness numerically
plot( adv.Hartford,
  vertex.color="white",
  vertex.label.cex=.8,
  vertex.size= b1 * 20 / max(b1) + 10,
  vertex.label = round(b1) )



### degree assortativity

degs <- degree(adv)
# edge list with numeric ids (start from 0 so +1)
el <- get.edgelist(adv, names=FALSE) + 1
head(el)
deglist <- apply(el, 2,    function(x) degs[x]    )
head(deglist)

plot(jitter(deglist), 
  main="Degree mixing",
  xlab="Sender", ylab="Receiver")

?degree

### community detection

# grouping nodes into communities by majority voting in the
# neighborhoods
com <- spinglass.community(adv.Hartford)
com
com <- com$membership
plot( adv.Hartford, 
  vertex.color=com+1,
  vertex.size=5, vertex.label=NA)

### triad census
?triad.census
triad.census(adv)
dyad.census(adv)

### cliques
largest.cliques(adv)