### R code for migration-based adjacency matrices
### Example using 2009-2011 data from the Office for National Statistics
### Date:   08/10/2014
### Author: Peter Dutey-Magni
### Email:  p.dutey-magni@soton.ac.uk

###################################################
### Downloading and unzipping
### between-district migration files
###################################################

# Setting local working directory
setwd("C:/local/")
install.packages(c("maptools", "spdep", "sqldf", "RCurl"))
library(maptools)
library(sqldf)
library(spdep)
library(RCurl)

# 2009 data
temp <- tempfile()
download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--part-1.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2009 - part 1 of 2.csv")
mig09 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)

download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--part-2.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2009 - part 2 of 2.csv")
mig09 <- rbind(mig09, read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T))

# 2010 data
download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2010-part-1.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2010 - part 1 of 2.csv")
mig10 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)

download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2010-part-2.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2010 - part 2 of 2.csv")
mig10 <- rbind(mig10, read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T))

# 2011 data
download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2011-part-1.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2011 - part 1 of 2.csv")
mig11 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)

download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2011-part-2.zip", temp, mode = "wb")
migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2011 - part 2 of 2.csv")
mig11 <- rbind(mig11, read.table(file = migtemp, sep = ",", colClasses  =  c(rep("character", 2), rep("integer", 2), "numeric"), header = T))

# District population data
download.file("http://www.nomisweb.co.uk/api/v01/dataset/nm_144_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=TYPE464", temp, mode = "wb")
ks1010ew <- read.csv(file = temp, stringsAsFactors=F)
ks1010ew <- ks1010ew[ , c("geography.code", "geography", "Variable..All.usual.residents..measures..Value")]
names(ks1010ew) <- c("LAD13CD", "LAD13NM", "pop")

closeAllConnections()
rm(temp, migtemp)

# Pooling years together; calculating the annual mean migation flows
migall <- aggregate(formula  =  Flow ~ OutLA + InLA + Age + Sex,
                 data     =  rbind(mig11, mig10, mig09),
                 FUN      =  sum)
migall$Flow <- migall$Flow / 3

# Creating a new combined Origin district/Destination district migration code
migall$Comb <- as.factor(paste(migall$OutLA, migall$InLA))

# Removing cross-border migrations to/from Scotland and Northern Ireland
migall <- migall[grep("^[EW]", migall$InLA), ]
migall <- migall[grep("^[EW]", migall$OutLA), ]


###################################################
### Amending disctrict ONS geography codes
### to reflect changes incurred in 2012
###################################################

# District boundary changes came into force on 1 April 2013; see
# SI 2013 No. 596 The East Hertfordshire and Stevenage (Boundary Change) Order 2013;
# SI 2013 No. 595 The Gateshead and Northumberland (Boundary Change) Order 2013.
# For consistency, we restore 2012 ONS geography codes used in the migration datasets.
ks1010ew$LAD12CD <- ks1010ew$LAD13CD
ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E07000242'] <- 'E07000097'
ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E07000243'] <- 'E07000101'
ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E08000037'] <- 'E08000020'
ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E06000057'] <- 'E06000048'

# Note: Any versions of the Nomis census table after 13 November 2015 may reflect
# ulterior boundary changes and therefore codes. The commands below check that
# local authority district codes match in both data sources.
# Boundary changes can be reviewed on the ONS Code History Database on the Geoportal:
# https://geoportal.statistics.gov.uk/geoportal/rest/find/document?searchText="code history database"

# The result of this overall test must be TRUE
all(sort(unique(migall$InLA)) == sort(unique(ks1010ew$LAD12CD)))

# one-by-one test in case the overall test is not TRUE
data.frame("migall.code" = sort(unique(migall$InLA)),
           "pop.code" = sort(unique(ks1010ew$LAD12CD)),
           "match"    = sort(unique(migall$InLA)) == sort(unique(ks1010ew$LAD12CD)))


###################################################
### Plot: Examining the age profile
### of migrations in 2009, 2010 and 2011
###################################################

c09 <- NULL
for (i in 0:112){
c09[i+1] <- sum(mig09$Flow[mig09$Age == i])/1000}
c10 <- NULL
for (i in 0:112){
c10[i+1] <- sum(mig10$Flow[mig10$Age == i])/1000}
c11 <- NULL
for (i in 0:112){
c11[i+1] <- sum(mig11$Flow[mig11$Age == i])/1000}

col <- c('plum2', 'violetred2', 'purple3')
cex <- .8
pch <- 16
lwd <- 2

# Intensity of annual intra-national migrations by age in England and Wales 2009-2011
plot(0:112, c09, pch = pch, cex = cex, col = col[1], ylim = c(0, 160),xlab = "Age",
     ylab = "Intra-national migallrants per annum (in thousands)", type  =  'l' , lwd = lwd)
abline(h = seq(0,200,10), col = "grey80")
abline(v = seq(0,120,5), col = "grey80")
points(0:112, c10, pch = pch, cex = cex, col = col[2], type = 'l', lwd = lwd)
points(0:112, c11, pch = pch, cex = cex, col = col[3], type =  'l', lwd = lwd)
legend("topright", legend = c("2009", "2010", "2011"), fill = col, bg = "white")


###################################################
### Producing the adjacency matrix
###################################################


# Adding census population estimates to the origin-destination data

mig <- sqldf("select d.*, c.pop as popOutLA
              from (select a.*, b.pop as popInLA
                    from migall a left join ks1010ew b
                    on a.InLA = b.LAD12CD) d
              left join ks1010ew c
              on d.OutLA = c.LAD12CD")

ODadjacency <- function(data = mig, popdata = ks1010ew[ , c("LAD12CD", "pop")],
                         minage = 0, maxage = 120,
                         nbneighbours = 3, ranked = T){
  # Computes an origin destination matrix of size 348 x 348 in which each cell
  # indicates either the rank (ranked = T) or the standardised weight (ranked =F)
  # of the pair of district, where the maximum number of pairs is set by
  # parameter nbneighbours.

  library(spdep)
  # Restricting the data to predefined age bounds
  # (for instance to exclude student/young people)
  data <- data[data$Age >= minage & data$Age <= maxage, ]
  data <- aggregate(formula = Flow ~ OutLA + InLA + Comb + popOutLA + popInLA,
                    data = data,
                    FUN = sum)
  # Calculating flow intensity (relative to the population of the origin district)
  data$rate <- data$Flow/data$popOutLA
  # Ordering by district of destination (alphabetically),
  # and by flow intensity (descending order) respectively
  data <- data[order(data$InLA, -data$rate), ]

  # By district of destination: giving weight 5 to the district
  # of origin of the biggest flow of immigrants, 4 to the
  # second biggest, ... 1 to the fifth biggest, and 0 to all the other district
  data$rankneigh <- unlist(tapply(X=data$rate, INDEX = data$InLA, FUN = function(x){
      if(length(x) > nbneighbours){
          return(c(nbneighbours:1, rep(0, length(x)-nbneighbours)))
      } else {
          return(c(nbneighbours:1)[1:length(x)])
      }}), use.names = F)

  #Taking all pairwise combinations of LADs to prepare the neighbourhood matrix
  LADs <- popdata[,1]
  LADs <- LADs[order(LADs)]
  LADs <- expand.grid(LADs, LADs, stringsAsFactors = F)
  names(LADs) <- c("OutLA", "InLA")
  LADs <- LADs[order(LADs[,1], LADs[,2]), ]

  # Producing the final matrix
  LADs <- merge(LADs, data[,c("OutLA", "InLA", "rankneigh")], by = c("OutLA", "InLA"), all.x = T )
  # removing the missing ranks (occuring either because no migration or data issue,
  # we assume their rank is zero because these is typically small LAs)
  LADs$rankneigh[is.na(LADs$rankneigh)] <- 0

  # Generating a dichotomous matrix
  LADs$binw <- 0
  LADs$binw[LADs$rankneigh>0] <- 1

  # Returning the required matrix
  if(ranked == F){
      message("Returning a binary adjacency matrix")
      matbin <- matrix(LADs$binw, length(unique(popdata[,1])), byrow = F)
      row.names(matbin) <- as.character(unique(LADs$InLA))
      return(mat2listw(matbin, style = "W"))
  } else {
      message("Returning a ranked adjacency matrix")
      matweights <- matrix(LADs$rankneigh, 348, 348, byrow = F)
      dimnames(matweights) <- list(as.character(unique(LADs$InLA)),
                                   as.character( unique(LADs$OutLA)))
      row.names(matweights) <- as.character(unique(LADs$InLA))
      return(mat2listw(matweights, style = "W"))
  }
}

test <- ODadjacency(minage = 0, nbneighbours = 3, ranked = T)
test25 <- ODadjacency(minage = 25, nbneighbours = 10, ranked = T)


###################################################
### Mapping the resulting adjacency structures
###################################################

# Downloading district boundary shapefile from the ONS geoportal

bin <- getBinaryURL("https://geoportal.statistics.gov.uk/Docs/Boundaries/Local_authority_district_(GB)_2013_Boundaries_(Generalised_Clipped).zip", ssl.verifypeer=FALSE)
con <- file("LADboundary.zip", open = "wb")
writeBin(bin, con)
close(con)
rm(bin, con)
unzip("LADboundary.zip")

# Loading shapefile
shp <- readShapePoly("LAD_DEC_2013_GB_BGC.shp")
# Removing Scotland
shp <- shp[grep("^[EW]",shp@data$LAD13CD), ]
# Recovering 2012 ONS codes
shp@data$LAD12CD <- as.character(shp@data$LAD13CD)
shp@data$LAD12CD[shp@data$LAD13CD == 'E07000242'] <- 'E07000097'
shp@data$LAD12CD[shp@data$LAD13CD == 'E07000243'] <- 'E07000101'
shp@data$LAD12CD[shp@data$LAD13CD == 'E08000037'] <- 'E08000020'
shp@data$LAD12CD[shp@data$LAD13CD == 'E06000057'] <- 'E06000048'

# Must ensure districts in the shapefile are alphabetically ordered by ONS code
shp <- shp[order(shp@data$LAD12CD), ]

pdf(file = "checkcontiguitymat.pdf" )
par(omi = c(0,0,0,0))
plot(shp,border = 'grey40', lwd = 0.2)
plot(test, coordinates(shp), lwd = 0.1, add = T, col = 2, pch = ".", cex = .000000000001 )
dev.off()