### R code for migration-based adjacency matrices ### Example using 2009-2011 data from the Office for National Statistics ### Date: 08/10/2014 ### Author: Peter Dutey-Magni ### Email: p.dutey-magni@soton.ac.uk ################################################### ### Downloading and unzipping ### between-district migration files ################################################### # Setting local working directory setwd("C:/local/") install.packages(c("maptools", "spdep", "sqldf", "RCurl")) library(maptools) library(sqldf) library(spdep) library(RCurl) # 2009 data temp <- tempfile() download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--part-1.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2009 - part 1 of 2.csv") mig09 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T) download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--part-2.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2009 - part 2 of 2.csv") mig09 <- rbind(mig09, read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)) # 2010 data download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2010-part-1.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2010 - part 1 of 2.csv") mig10 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T) download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2010-part-2.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2010 - part 2 of 2.csv") mig10 <- rbind(mig10, read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)) # 2011 data download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2011-part-1.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2011 - part 1 of 2.csv") mig11 <- read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T) download.file("http://www.ons.gov.uk/ons/rel/migration1/internal-migration-by-local-authorities-in-england-and-wales/research-series--years-ending-june-2009-to-june-2011/rft-detailed-estimates--june-2011-part-2.zip", temp, mode = "wb") migtemp <- unz(temp, filename = "Internal migration detailed estimates file - research series year ending June 2011 - part 2 of 2.csv") mig11 <- rbind(mig11, read.table(file = migtemp, sep = ",", colClasses = c(rep("character", 2), rep("integer", 2), "numeric"), header = T)) # District population data download.file("http://www.nomisweb.co.uk/api/v01/dataset/nm_144_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=TYPE464", temp, mode = "wb") ks1010ew <- read.csv(file = temp, stringsAsFactors=F) ks1010ew <- ks1010ew[ , c("geography.code", "geography", "Variable..All.usual.residents..measures..Value")] names(ks1010ew) <- c("LAD13CD", "LAD13NM", "pop") closeAllConnections() rm(temp, migtemp) # Pooling years together; calculating the annual mean migation flows migall <- aggregate(formula = Flow ~ OutLA + InLA + Age + Sex, data = rbind(mig11, mig10, mig09), FUN = sum) migall$Flow <- migall$Flow / 3 # Creating a new combined Origin district/Destination district migration code migall$Comb <- as.factor(paste(migall$OutLA, migall$InLA)) # Removing cross-border migrations to/from Scotland and Northern Ireland migall <- migall[grep("^[EW]", migall$InLA), ] migall <- migall[grep("^[EW]", migall$OutLA), ] ################################################### ### Amending disctrict ONS geography codes ### to reflect changes incurred in 2012 ################################################### # District boundary changes came into force on 1 April 2013; see # SI 2013 No. 596 The East Hertfordshire and Stevenage (Boundary Change) Order 2013; # SI 2013 No. 595 The Gateshead and Northumberland (Boundary Change) Order 2013. # For consistency, we restore 2012 ONS geography codes used in the migration datasets. ks1010ew$LAD12CD <- ks1010ew$LAD13CD ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E07000242'] <- 'E07000097' ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E07000243'] <- 'E07000101' ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E08000037'] <- 'E08000020' ks1010ew$LAD12CD[ks1010ew$LAD13CD == 'E06000057'] <- 'E06000048' # Note: Any versions of the Nomis census table after 13 November 2015 may reflect # ulterior boundary changes and therefore codes. The commands below check that # local authority district codes match in both data sources. # Boundary changes can be reviewed on the ONS Code History Database on the Geoportal: # https://geoportal.statistics.gov.uk/geoportal/rest/find/document?searchText="code history database" # The result of this overall test must be TRUE all(sort(unique(migall$InLA)) == sort(unique(ks1010ew$LAD12CD))) # one-by-one test in case the overall test is not TRUE data.frame("migall.code" = sort(unique(migall$InLA)), "pop.code" = sort(unique(ks1010ew$LAD12CD)), "match" = sort(unique(migall$InLA)) == sort(unique(ks1010ew$LAD12CD))) ################################################### ### Plot: Examining the age profile ### of migrations in 2009, 2010 and 2011 ################################################### c09 <- NULL for (i in 0:112){ c09[i+1] <- sum(mig09$Flow[mig09$Age == i])/1000} c10 <- NULL for (i in 0:112){ c10[i+1] <- sum(mig10$Flow[mig10$Age == i])/1000} c11 <- NULL for (i in 0:112){ c11[i+1] <- sum(mig11$Flow[mig11$Age == i])/1000} col <- c('plum2', 'violetred2', 'purple3') cex <- .8 pch <- 16 lwd <- 2 # Intensity of annual intra-national migrations by age in England and Wales 2009-2011 plot(0:112, c09, pch = pch, cex = cex, col = col[1], ylim = c(0, 160),xlab = "Age", ylab = "Intra-national migallrants per annum (in thousands)", type = 'l' , lwd = lwd) abline(h = seq(0,200,10), col = "grey80") abline(v = seq(0,120,5), col = "grey80") points(0:112, c10, pch = pch, cex = cex, col = col[2], type = 'l', lwd = lwd) points(0:112, c11, pch = pch, cex = cex, col = col[3], type = 'l', lwd = lwd) legend("topright", legend = c("2009", "2010", "2011"), fill = col, bg = "white") ################################################### ### Producing the adjacency matrix ################################################### # Adding census population estimates to the origin-destination data mig <- sqldf("select d.*, c.pop as popOutLA from (select a.*, b.pop as popInLA from migall a left join ks1010ew b on a.InLA = b.LAD12CD) d left join ks1010ew c on d.OutLA = c.LAD12CD") ODadjacency <- function(data = mig, popdata = ks1010ew[ , c("LAD12CD", "pop")], minage = 0, maxage = 120, nbneighbours = 3, ranked = T){ # Computes an origin destination matrix of size 348 x 348 in which each cell # indicates either the rank (ranked = T) or the standardised weight (ranked =F) # of the pair of district, where the maximum number of pairs is set by # parameter nbneighbours. library(spdep) # Restricting the data to predefined age bounds # (for instance to exclude student/young people) data <- data[data$Age >= minage & data$Age <= maxage, ] data <- aggregate(formula = Flow ~ OutLA + InLA + Comb + popOutLA + popInLA, data = data, FUN = sum) # Calculating flow intensity (relative to the population of the origin district) data$rate <- data$Flow/data$popOutLA # Ordering by district of destination (alphabetically), # and by flow intensity (descending order) respectively data <- data[order(data$InLA, -data$rate), ] # By district of destination: giving weight 5 to the district # of origin of the biggest flow of immigrants, 4 to the # second biggest, ... 1 to the fifth biggest, and 0 to all the other district data$rankneigh <- unlist(tapply(X=data$rate, INDEX = data$InLA, FUN = function(x){ if(length(x) > nbneighbours){ return(c(nbneighbours:1, rep(0, length(x)-nbneighbours))) } else { return(c(nbneighbours:1)[1:length(x)]) }}), use.names = F) #Taking all pairwise combinations of LADs to prepare the neighbourhood matrix LADs <- popdata[,1] LADs <- LADs[order(LADs)] LADs <- expand.grid(LADs, LADs, stringsAsFactors = F) names(LADs) <- c("OutLA", "InLA") LADs <- LADs[order(LADs[,1], LADs[,2]), ] # Producing the final matrix LADs <- merge(LADs, data[,c("OutLA", "InLA", "rankneigh")], by = c("OutLA", "InLA"), all.x = T ) # removing the missing ranks (occuring either because no migration or data issue, # we assume their rank is zero because these is typically small LAs) LADs$rankneigh[is.na(LADs$rankneigh)] <- 0 # Generating a dichotomous matrix LADs$binw <- 0 LADs$binw[LADs$rankneigh>0] <- 1 # Returning the required matrix if(ranked == F){ message("Returning a binary adjacency matrix") matbin <- matrix(LADs$binw, length(unique(popdata[,1])), byrow = F) row.names(matbin) <- as.character(unique(LADs$InLA)) return(mat2listw(matbin, style = "W")) } else { message("Returning a ranked adjacency matrix") matweights <- matrix(LADs$rankneigh, 348, 348, byrow = F) dimnames(matweights) <- list(as.character(unique(LADs$InLA)), as.character( unique(LADs$OutLA))) row.names(matweights) <- as.character(unique(LADs$InLA)) return(mat2listw(matweights, style = "W")) } } test <- ODadjacency(minage = 0, nbneighbours = 3, ranked = T) test25 <- ODadjacency(minage = 25, nbneighbours = 10, ranked = T) ################################################### ### Mapping the resulting adjacency structures ################################################### # Downloading district boundary shapefile from the ONS geoportal bin <- getBinaryURL("https://geoportal.statistics.gov.uk/Docs/Boundaries/Local_authority_district_(GB)_2013_Boundaries_(Generalised_Clipped).zip", ssl.verifypeer=FALSE) con <- file("LADboundary.zip", open = "wb") writeBin(bin, con) close(con) rm(bin, con) unzip("LADboundary.zip") # Loading shapefile shp <- readShapePoly("LAD_DEC_2013_GB_BGC.shp") # Removing Scotland shp <- shp[grep("^[EW]",shp@data$LAD13CD), ] # Recovering 2012 ONS codes shp@data$LAD12CD <- as.character(shp@data$LAD13CD) shp@data$LAD12CD[shp@data$LAD13CD == 'E07000242'] <- 'E07000097' shp@data$LAD12CD[shp@data$LAD13CD == 'E07000243'] <- 'E07000101' shp@data$LAD12CD[shp@data$LAD13CD == 'E08000037'] <- 'E08000020' shp@data$LAD12CD[shp@data$LAD13CD == 'E06000057'] <- 'E06000048' # Must ensure districts in the shapefile are alphabetically ordered by ONS code shp <- shp[order(shp@data$LAD12CD), ] pdf(file = "checkcontiguitymat.pdf" ) par(omi = c(0,0,0,0)) plot(shp,border = 'grey40', lwd = 0.2) plot(test, coordinates(shp), lwd = 0.1, add = T, col = 2, pch = ".", cex = .000000000001 ) dev.off()